[llvm] r331410 - [PowerPC] No CTR loop if the candidate exiting block is in a different loop

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Wed May 2 15:56:04 PDT 2018


Author: nemanjai
Date: Wed May  2 15:56:04 2018
New Revision: 331410

URL: http://llvm.org/viewvc/llvm-project?rev=331410&view=rev
Log:
[PowerPC] No CTR loop if the candidate exiting block is in a different loop

The CTR loops pass will insert the decrementing branch instruction in an exiting
block for the loop being transformed. However if that block is part of another
loop as well (whether a nested loop or with irreducible CFG), it is not valid
to use that exiting block. In fact, if the loop hass irreducible CFG, we don't
bother analyzing it and we just bail on the transformation. In practice, this
doesn't lead to a noticeable reduction in the number of loops transformed by
this pass.

Fixes https://bugs.llvm.org/show_bug.cgi?id=37229

Differential Revision: https://reviews.llvm.org/D46162

Added:
    llvm/trunk/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCCTRLoops.cpp

Modified: llvm/trunk/lib/Target/PowerPC/PPCCTRLoops.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCCTRLoops.cpp?rev=331410&r1=331409&r2=331410&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCCTRLoops.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCCTRLoops.cpp Wed May  2 15:56:04 2018
@@ -30,8 +30,10 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -512,6 +514,12 @@ bool PPCCTRLoops::convertToCTRLoop(Loop
   if (MadeChange)
     return MadeChange;
 
+  // Bail out if the loop has irreducible control flow.
+  LoopBlocksRPO RPOT(L);
+  RPOT.perform(LI);
+  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI))
+    return false;
+
 #ifndef NDEBUG
   // Stop trying after reaching the limit (if any).
   int Limit = CTRLoopLimit;
@@ -572,6 +580,12 @@ bool PPCCTRLoops::convertToCTRLoop(Loop
     if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
       continue;
 
+    // If this exiting block is contained in a nested loop, it is not eligible
+    // for insertion of the branch-and-decrement since the inner loop would
+    // end up messing up the value in the CTR.
+    if (LI->getLoopFor(*I) != L)
+      continue;
+
     // We now have a loop-invariant count of loop iterations (which is not the
     // constant zero) for which we know that this loop will not exit via this
     // exisiting block.

Added: llvm/trunk/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll?rev=331410&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll Wed May  2 15:56:04 2018
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s
+define signext i32 @test(i32* noalias %PtrA, i32* noalias %PtrB, i32 signext %LenA, i32 signext %LenB) #0 {
+; CHECK-LABEL: test:
+; CHECK-NOT: mtctr
+; CHECK-NOT: bdnz
+; CHECK-NOT: bdz
+; CHECK:     blr
+entry:
+  br label %block2
+
+block2:                                           ; preds = %entry
+  br label %block3
+
+block3:                                           ; preds = %block8, %block2
+  %OuterInd.0 = phi i32 [ 0, %block2 ], [ %inc, %block8 ]
+  %InnerInd.0 = phi i32 [ 0, %block2 ], [ %inc1, %block8 ]
+  %inc = add nsw i32 %OuterInd.0, 1
+  br label %block4
+
+block4:                                           ; preds = %if.then4, %block3
+  %InnerInd.1 = phi i32 [ %InnerInd.0, %block3 ], [ %inc1, %if.then4 ]
+  %cmp = icmp sge i32 %inc, %LenA
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %block4
+  %sub = sub nsw i32 %inc, 1
+  %idxprom = sext i32 %sub to i64
+  %arrayidx = getelementptr inbounds i32, i32* %PtrA, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  ret i32 %0
+
+if.end:                                           ; preds = %block4
+  br label %block5
+
+block5:                                           ; preds = %if.end
+  %inc1 = add nsw i32 %InnerInd.1, 1
+  %idxprom2 = sext i32 %InnerInd.1 to i64
+  %arrayidx3 = getelementptr inbounds i32, i32* %PtrB, i64 %idxprom2
+  %1 = load i32, i32* %arrayidx3, align 4
+  %tobool = icmp ne i32 %1, 0
+  br i1 %tobool, label %if.then4, label %if.end9
+
+if.then4:                                         ; preds = %block5
+  %idxprom5 = sext i32 %inc to i64
+  %arrayidx6 = getelementptr inbounds i32, i32* %PtrA, i64 %idxprom5
+  %2 = load i32, i32* %arrayidx6, align 4
+  %idxprom7 = sext i32 %inc1 to i64
+  %arrayidx8 = getelementptr inbounds i32, i32* %PtrB, i64 %idxprom7
+  store i32 %2, i32* %arrayidx8, align 4
+  br label %block4
+
+if.end9:                                          ; preds = %block5
+  br label %block6
+
+block6:                                           ; preds = %if.end9
+  %idxprom10 = sext i32 %inc to i64
+  %arrayidx11 = getelementptr inbounds i32, i32* %PtrA, i64 %idxprom10
+  %3 = load i32, i32* %arrayidx11, align 4
+  %inc12 = add nsw i32 %3, 1
+  store i32 %inc12, i32* %arrayidx11, align 4
+  br label %block8
+
+block8:                                           ; preds = %block6
+  br label %block3
+}




More information about the llvm-commits mailing list