[llvm] r346438 - [LoopInterchange] Support reductions across inner and outer loop.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 8 12:44:19 PST 2018


Author: fhahn
Date: Thu Nov  8 12:44:19 2018
New Revision: 346438

URL: http://llvm.org/viewvc/llvm-project?rev=346438&view=rev
Log:
[LoopInterchange] Support reductions across inner and outer loop.

This patch adds logic to detect reductions across the inner and outer
loop by following the incoming values of PHI nodes in the outer loop. If
the incoming values take part in a reduction in the inner loop or come
from outside the outer loop, we found a reduction spanning across inner
and outer loop.

With this change, ~10% more loops are interchanged in the LLVM
test-suite + SPEC2006.

Fixes https://bugs.llvm.org/show_bug.cgi?id=30472

Reviewers: mcrosier, efriedma, karthikthecool, davide, hfinkel, dmgreen

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D43245

Added:
    llvm/trunk/test/Transforms/LoopInterchange/outer-only-reductions.ll
    llvm/trunk/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp
    llvm/trunk/test/Transforms/LoopInterchange/inner-only-reductions.ll
    llvm/trunk/test/Transforms/LoopInterchange/lcssa.ll

Modified: llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp?rev=346438&r1=346437&r2=346438&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp Thu Nov  8 12:44:19 2018
@@ -339,10 +339,21 @@ public:
 
   bool currentLimitations();
 
+  const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
+    return OuterInnerReductions;
+  }
+
 private:
   bool tightlyNested(Loop *Outer, Loop *Inner);
   bool containsUnsafeInstructions(BasicBlock *BB);
-  bool findInductions(Loop *L, SmallVector<PHINode *, 8> &Inductions);
+
+  /// Discover induction and reduction PHIs in the header of \p L. Induction
+  /// PHIs are added to \p Inductions, reductions are added to
+  /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
+  /// to be passed as \p InnerLoop.
+  bool findInductionAndReductions(Loop *L,
+                                  SmallVector<PHINode *, 8> &Inductions,
+                                  Loop *InnerLoop);
 
   Loop *OuterLoop;
   Loop *InnerLoop;
@@ -352,6 +363,9 @@ private:
   /// Interface to emit optimization remarks.
   OptimizationRemarkEmitter *ORE;
 
+  /// Set of reduction PHIs taking part of a reduction across the inner and
+  /// outer loop.
+  SmallPtrSet<PHINode *, 4> OuterInnerReductions;
 };
 
 /// LoopInterchangeProfitability checks if it is profitable to interchange the
@@ -384,9 +398,10 @@ class LoopInterchangeTransform {
 public:
   LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
                            LoopInfo *LI, DominatorTree *DT,
-                           BasicBlock *LoopNestExit)
+                           BasicBlock *LoopNestExit,
+                           const LoopInterchangeLegality &LIL)
       : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
-        LoopExit(LoopNestExit) {}
+        LoopExit(LoopNestExit), LIL(LIL) {}
 
   /// Interchange OuterLoop and InnerLoop.
   bool transform();
@@ -411,6 +426,8 @@ private:
   LoopInfo *LI;
   DominatorTree *DT;
   BasicBlock *LoopExit;
+
+  const LoopInterchangeLegality &LIL;
 };
 
 // Main LoopInterchange Pass.
@@ -560,8 +577,8 @@ struct LoopInterchange : public LoopPass
              << "Loop interchanged with enclosing loop.";
     });
 
-    LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT,
-                                 LoopNestExit);
+    LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit,
+                                 LIL);
     LIT.transform();
     LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
     LoopsInterchanged++;
@@ -633,8 +650,36 @@ bool LoopInterchangeLegality::isLoopStru
   return true;
 }
 
-bool LoopInterchangeLegality::findInductions(
-    Loop *L, SmallVector<PHINode *, 8> &Inductions) {
+// If SV is a LCSSA PHI node with a single incoming value, return the incoming
+// value.
+static Value *followLCSSA(Value *SV) {
+  PHINode *PHI = dyn_cast<PHINode>(SV);
+  if (!PHI)
+    return SV;
+
+  if (PHI->getNumIncomingValues() != 1)
+    return SV;
+  return followLCSSA(PHI->getIncomingValue(0));
+}
+
+// Check V's users to see if it is involved in a reduction in L.
+static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
+  for (Value *User : V->users()) {
+    if (PHINode *PHI = dyn_cast<PHINode>(User)) {
+      if (PHI->getNumIncomingValues() == 1)
+        continue;
+      RecurrenceDescriptor RD;
+      if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
+        return PHI;
+      return nullptr;
+    }
+  }
+
+  return nullptr;
+}
+
+bool LoopInterchangeLegality::findInductionAndReductions(
+    Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
   if (!L->getLoopLatch() || !L->getLoopPredecessor())
     return false;
   for (PHINode &PHI : L->getHeader()->phis()) {
@@ -643,8 +688,32 @@ bool LoopInterchangeLegality::findInduct
     if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
       Inductions.push_back(&PHI);
     else {
-      LLVM_DEBUG(dbgs() << "Failed to recognize PHI as an induction.\n");
-      return false;
+      // PHIs in inner loops need to be part of a reduction in the outer loop,
+      // discovered when checking the PHIs of the outer loop earlier.
+      if (!InnerLoop) {
+        if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end()) {
+          LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
+                               "across the outer loop.\n");
+          return false;
+        }
+      } else {
+        assert(PHI.getNumIncomingValues() == 2 &&
+               "Phis in loop header should have exactly 2 incoming values");
+        // Check if we have a PHI node in the outer loop that has a reduction
+        // result from the inner loop as an incoming value.
+        Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
+        PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
+        if (!InnerRedPhi ||
+            !llvm::any_of(InnerRedPhi->incoming_values(),
+                          [&PHI](Value *V) { return V == &PHI; })) {
+          LLVM_DEBUG(
+              dbgs()
+              << "Failed to recognize PHI as an induction or reduction.\n");
+          return false;
+        }
+        OuterInnerReductions.insert(&PHI);
+        OuterInnerReductions.insert(InnerRedPhi);
+      }
     }
   }
   return true;
@@ -693,63 +762,64 @@ bool LoopInterchangeLegality::currentLim
 
   PHINode *InnerInductionVar;
   SmallVector<PHINode *, 8> Inductions;
-  if (!findInductions(InnerLoop, Inductions)) {
+  if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
     LLVM_DEBUG(
-        dbgs() << "Only inner loops with induction or reduction PHI nodes "
+        dbgs() << "Only outer loops with induction or reduction PHI nodes "
                << "are supported currently.\n");
     ORE->emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
-                                      InnerLoop->getStartLoc(),
-                                      InnerLoop->getHeader())
-             << "Only inner loops with induction or reduction PHI nodes can be"
-                " interchange currently.";
+      return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
+                                      OuterLoop->getStartLoc(),
+                                      OuterLoop->getHeader())
+             << "Only outer loops with induction or reduction PHI nodes can be"
+                " interchanged currently.";
     });
     return true;
   }
 
   // TODO: Currently we handle only loops with 1 induction variable.
   if (Inductions.size() != 1) {
-    LLVM_DEBUG(
-        dbgs() << "We currently only support loops with 1 induction variable."
-               << "Failed to interchange due to current limitation\n");
+    LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
+                      << "supported currently.\n");
     ORE->emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner",
-                                      InnerLoop->getStartLoc(),
-                                      InnerLoop->getHeader())
-             << "Only inner loops with 1 induction variable can be "
+      return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter",
+                                      OuterLoop->getStartLoc(),
+                                      OuterLoop->getHeader())
+             << "Only outer loops with 1 induction variable can be "
                 "interchanged currently.";
     });
     return true;
   }
 
-  InnerInductionVar = Inductions.pop_back_val();
-  if (!findInductions(OuterLoop, Inductions)) {
+  Inductions.clear();
+  if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
     LLVM_DEBUG(
-        dbgs() << "Only outer loops with induction or reduction PHI nodes "
+        dbgs() << "Only inner loops with induction or reduction PHI nodes "
                << "are supported currently.\n");
     ORE->emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
-                                      OuterLoop->getStartLoc(),
-                                      OuterLoop->getHeader())
-             << "Only outer loops with induction or reduction PHI nodes can be"
-                " interchanged currently.";
+      return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
+                                      InnerLoop->getStartLoc(),
+                                      InnerLoop->getHeader())
+             << "Only inner loops with induction or reduction PHI nodes can be"
+                " interchange currently.";
     });
     return true;
   }
 
   // TODO: Currently we handle only loops with 1 induction variable.
   if (Inductions.size() != 1) {
-    LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
-                      << "supported currently.\n");
+    LLVM_DEBUG(
+        dbgs() << "We currently only support loops with 1 induction variable."
+               << "Failed to interchange due to current limitation\n");
     ORE->emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter",
-                                      OuterLoop->getStartLoc(),
-                                      OuterLoop->getHeader())
-             << "Only outer loops with 1 induction variable can be "
+      return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner",
+                                      InnerLoop->getStartLoc(),
+                                      InnerLoop->getHeader())
+             << "Only inner loops with 1 induction variable can be "
                 "interchanged currently.";
     });
     return true;
   }
+  InnerInductionVar = Inductions.pop_back_val();
 
   // TODO: Triangular loops are not handled for now.
   if (!isLoopStructureUnderstood(InnerInductionVar)) {
@@ -1387,13 +1457,29 @@ bool LoopInterchangeTransform::adjustLoo
   // replaced by Inners'.
   updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
 
-  // Make sure we have no other PHIs.
-  auto InnerPhis = drop_begin(InnerLoopHeader->phis(), 1);
-  auto OuterPhis = drop_begin(OuterLoopHeader->phis(), 1);
-  (void) InnerPhis;
-  (void) OuterPhis;
-  assert(begin(InnerPhis) == end(InnerPhis) && "Unexpected PHIs in inner loop");
-  assert(begin(OuterPhis) == end(OuterPhis) && "Unexpected PHis in outer loop");
+  // Now update the reduction PHIs in the inner and outer loop headers.
+  SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
+  for (PHINode &PHI : drop_begin(InnerLoopHeader->phis(), 1))
+    InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
+  for (PHINode &PHI : drop_begin(OuterLoopHeader->phis(), 1))
+    OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
+
+  auto &OuterInnerReductions = LIL.getOuterInnerReductions();
+  (void)OuterInnerReductions;
+
+  // Now move the remaining reduction PHIs from outer to inner loop header and
+  // vice versa. The PHI nodes must be part of a reduction across the inner and
+  // outer loop and all the remains to do is and updating the incoming blocks.
+  for (PHINode *PHI : OuterLoopPHIs) {
+    PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
+    assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() &&
+           "Expected a reduction PHI node");
+  }
+  for (PHINode *PHI : InnerLoopPHIs) {
+    PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
+    assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() &&
+           "Expected a reduction PHI node");
+  }
 
   // Update the incoming blocks for moved PHI nodes.
   updateIncomingBlock(OuterLoopHeader, InnerLoopPreHeader, OuterLoopPreHeader);

Modified: llvm/trunk/test/Transforms/LoopInterchange/inner-only-reductions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopInterchange/inner-only-reductions.ll?rev=346438&r1=346437&r2=346438&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopInterchange/inner-only-reductions.ll (original)
+++ llvm/trunk/test/Transforms/LoopInterchange/inner-only-reductions.ll Thu Nov  8 12:44:19 2018
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
-; RUN:     -verify-dom-info -verify-loop-info 2>&1 | FileCheck -check-prefix=IR %s
+; RUN:     -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1 | FileCheck -check-prefix=IR %s
 ; RUN: FileCheck --input-file=%t %s
 
 ; Inner loop only reductions are not supported currently. See discussion at

Modified: llvm/trunk/test/Transforms/LoopInterchange/lcssa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopInterchange/lcssa.ll?rev=346438&r1=346437&r2=346438&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopInterchange/lcssa.ll (original)
+++ llvm/trunk/test/Transforms/LoopInterchange/lcssa.ll Thu Nov  8 12:44:19 2018
@@ -1,5 +1,5 @@
-; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -pass-remarks-output=%t
-; RUN: cat %t |  FileCheck --check-prefix REMARK %s
+; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -pass-remarks-output=%t -S
+; RUN: FileCheck --input-file %t --check-prefix REMARK %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"

Added: llvm/trunk/test/Transforms/LoopInterchange/outer-only-reductions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopInterchange/outer-only-reductions.ll?rev=346438&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopInterchange/outer-only-reductions.ll (added)
+++ llvm/trunk/test/Transforms/LoopInterchange/outer-only-reductions.ll Thu Nov  8 12:44:19 2018
@@ -0,0 +1,52 @@
+; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN:     -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1 | FileCheck -check-prefix=IR %s
+; RUN: FileCheck --input-file=%t %s
+
+; Outer loop only reductions are not supported currently.
+
+ at A = common global [500 x [500 x i32]] zeroinitializer
+
+;; global X
+
+;;  for( int i=1;i<N;i++) {
+;;    for( int j=1;j<N;j++)
+;;      ;
+;;    X+=A[j][i];
+;;  }
+
+; CHECK: --- !Missed
+; CHECK-NEXT: Pass:            loop-interchange
+; CHECK-NEXT: Name:            UnsupportedPHI
+; CHECK-NEXT: Function:        reduction_01
+
+; IR-LABEL: @reduction_01(
+; IR-NOT: split
+
+define i32 @reduction_01(i32 %N) {
+entry:
+  br label %outer.header
+
+outer.header:                                  ; preds = %for.cond1.for.inc6_crit_edge, %entry
+  %indvars.iv18 = phi i64 [ %indvars.iv.next19, %outer.inc ], [ 1, %entry ]
+  %add15 = phi i32 [ 0, %entry ], [ %add, %outer.inc ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
+  %indvars.iv = phi i64 [ 1, %outer.header ], [ %indvars.iv.next, %for.body3 ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %outer.inc, label %for.body3
+
+outer.inc:                     ; preds = %for.body3
+  %arrayidx5 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv18
+  %0 = load i32, i32* %arrayidx5
+  %add = add nsw i32 %add15, %0
+  %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1
+  %lftr.wideiv20 = trunc i64 %indvars.iv.next19 to i32
+  %exitcond21 = icmp eq i32 %lftr.wideiv20, %N
+  br i1 %exitcond21, label %for.end8, label %outer.header
+
+for.end8:                                         ; preds = %for.cond1.for.inc6_crit_edge, %entry
+  ret i32 %add
+}

Added: llvm/trunk/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll?rev=346438&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll (added)
+++ llvm/trunk/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll Thu Nov  8 12:44:19 2018
@@ -0,0 +1,150 @@
+; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN:     -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1 | FileCheck %s
+; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; REMARKS: --- !Passed
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            Interchanged
+; REMARKS-NEXT: Function:        test1
+
+define i64 @test1([100 x [100 x i64]]* %Arr) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR2_PREHEADER:%.*]]
+; CHECK:       for1.header.preheader:
+; CHECK-NEXT:    br label [[FOR1_HEADER:%.*]]
+; CHECK:       for1.header:
+; CHECK-NEXT:    [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR1_INC:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[SUM_INNER:%.*]] = phi i64 [ [[SUM_INC:%.*]], [[FOR1_INC]] ], [ [[SUM_OUTER:%.*]], [[FOR1_HEADER_PREHEADER]] ]
+; CHECK-NEXT:    br label [[FOR2_SPLIT1:%.*]]
+; CHECK:       for2.preheader:
+; CHECK-NEXT:    br label [[FOR2:%.*]]
+; CHECK:       for2:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR2_SPLIT:%.*]] ], [ 0, [[FOR2_PREHEADER]] ]
+; CHECK-NEXT:    [[SUM_OUTER]] = phi i64 [ [[SUM_INC_LCSSA:%.*]], [[FOR2_SPLIT]] ], [ 0, [[FOR2_PREHEADER]] ]
+; CHECK-NEXT:    br label [[FOR1_HEADER_PREHEADER]]
+; CHECK:       for2.split1:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* [[ARR:%.*]], i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV23]]
+; CHECK-NEXT:    [[LV:%.*]] = load i64, i64* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[SUM_INC]] = add i64 [[SUM_INNER]], [[LV]]
+; CHECK-NEXT:    br label [[FOR1_INC]]
+; CHECK:       for2.split:
+; CHECK-NEXT:    [[SUM_INC_LCSSA]] = phi i64 [ [[SUM_INC]], %for1.inc ]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXIT1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_3]], 100
+; CHECK-NEXT:    br i1 [[EXIT1]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR2]]
+; CHECK:       for1.inc:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT24]] = add nuw nsw i64 [[INDVARS_IV23]], 1
+; CHECK-NEXT:    [[EXIT2:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT24]], 100
+; CHECK-NEXT:    br i1 [[EXIT2]], label [[FOR2_SPLIT]], label [[FOR1_HEADER]]
+; CHECK:       for1.loopexit:
+; CHECK-NEXT:    [[SUM_INC_LCSSA2:%.*]] = phi i64 [ [[SUM_INC_LCSSA]], [[FOR2_SPLIT]] ]
+; CHECK-NEXT:    ret i64 [[SUM_INC_LCSSA2]]
+;
+entry:
+  br label %for1.header
+
+for1.header:                                         ; preds = %for1.inc, %entry
+  %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
+  %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
+  br label %for2
+
+for2:                                        ; preds = %for2, %for1.header
+  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
+  %sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ]
+  %arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
+  %lv = load i64, i64* %arrayidx, align 4
+  %sum.inc = add i64 %sum.inner, %lv
+  %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
+  %exit1 = icmp eq i64 %indvars.iv.next.3, 100
+  br i1 %exit1, label %for1.inc, label %for2
+
+for1.inc:                                ; preds = %for2
+  %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
+  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+  %exit2 = icmp eq i64 %indvars.iv.next24, 100
+  br i1 %exit2, label %for1.loopexit, label %for1.header
+
+for1.loopexit:                                 ; preds = %for1.inc
+  %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
+  ret i64 %sum.inc.lcssa2
+}
+
+; In this test case, the inner reduction PHI %inner does not involve the outer
+; reduction PHI %sum.outer, do not interchange.
+; REMARKS: --- !Missed
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            UnsupportedPHIOuter
+; REMARKS-NEXT: Function:        test2
+
+define i64 @test2([100 x [100 x i64]]* %Arr) {
+entry:
+  br label %for1.header
+
+for1.header:                                         ; preds = %for1.inc, %entry
+  %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
+  %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
+  br label %for2
+
+for2:                                        ; preds = %for2, %for1.header
+  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
+  %inner = phi i64 [ %indvars.iv23, %for1.header ], [ %sum.inc, %for2 ]
+  %arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
+  %lv = load i64, i64* %arrayidx, align 4
+  %sum.inc = add i64 %inner, %lv
+  %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
+  %exit1 = icmp eq i64 %indvars.iv.next.3, 100
+  br i1 %exit1, label %for1.inc, label %for2
+
+for1.inc:                                ; preds = %for2
+  %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
+  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+  %exit2 = icmp eq i64 %indvars.iv.next24, 100
+  br i1 %exit2, label %for1.loopexit, label %for1.header
+
+for1.loopexit:                                 ; preds = %for1.inc
+  %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
+  ret i64 %sum.inc.lcssa2
+}
+
+; Check that we do not interchange if there is an additional instruction
+; between the outer and inner reduction PHIs.
+; REMARKS: --- !Missed
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            UnsupportedPHIOuter
+; REMARKS-NEXT: Function:        test3
+
+define i64 @test3([100 x [100 x i64]]* %Arr) {
+entry:
+  br label %for1.header
+
+for1.header:                                         ; preds = %for1.inc, %entry
+  %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
+  %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
+  %so = add i64 %sum.outer, 10
+  br label %for2
+
+for2:                                        ; preds = %for2, %for1.header
+  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
+  %sum.inner = phi i64 [ %so, %for1.header ], [ %sum.inc, %for2 ]
+  %arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
+  %lv = load i64, i64* %arrayidx, align 4
+  %sum.inc = add i64 %sum.inner, %lv
+  %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
+  %exit1 = icmp eq i64 %indvars.iv.next.3, 100
+  br i1 %exit1, label %for1.inc, label %for2
+
+for1.inc:                                ; preds = %for2
+  %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
+  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+  %exit2 = icmp eq i64 %indvars.iv.next24, 100
+  br i1 %exit2, label %for1.loopexit, label %for1.header
+
+for1.loopexit:                                 ; preds = %for1.inc
+  %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
+  ret i64 %sum.inc.lcssa2
+}




More information about the llvm-commits mailing list