[llvm] 44edc6f - [SCEV] rewriteLoopExitValues(): even if have hard uses, still rewrite if cheap (PR44668)

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 25 12:07:21 PST 2020


Author: Roman Lebedev
Date: 2020-02-25T23:05:59+03:00
New Revision: 44edc6fd2c63b7db43e13cc8caf1fee79bebdb5f

URL: https://github.com/llvm/llvm-project/commit/44edc6fd2c63b7db43e13cc8caf1fee79bebdb5f
DIFF: https://github.com/llvm/llvm-project/commit/44edc6fd2c63b7db43e13cc8caf1fee79bebdb5f.diff

LOG: [SCEV] rewriteLoopExitValues(): even if have hard uses, still rewrite if cheap (PR44668)

Summary:
Replacing uses of IV outside of the loop is likely generally useful,
but `rewriteLoopExitValues()` is cautious, and if it isn't told to always
perform the replacement, and there are hard uses of IV in loop,
it doesn't replace.

In [[ https://bugs.llvm.org/show_bug.cgi?id=44668 | PR44668 ]],
that prevents `-indvars` from replacing uses of induction variable
after the loop, which might be one of the optimization failures
preventing that code from being vectorized.

Instead, now that the cost model is fixed, i believe we should be
a little bit more optimistic, and also perform replacement
if we believe it is within our budget.

Fixes [[ https://bugs.llvm.org/show_bug.cgi?id=44668 | PR44668 ]].

Reviewers: reames, mkazantsev, asbirlea, fhahn, skatkov

Reviewed By: mkazantsev

Subscribers: nikic, hiraditya, zzheng, javed.absar, dmgreen, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D73501

Added: 
    llvm/test/Transforms/IndVarSimplify/do-recompute-if-cheap.ll

Modified: 
    llvm/lib/Transforms/Utils/LoopUtils.cpp
    llvm/test/Transforms/IndVarSimplify/elim-extend.ll
    llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll
    llvm/test/Transforms/IndVarSimplify/pr28705.ll
    llvm/test/Transforms/IndVarSimplify/pr39673.ll

Removed: 
    llvm/test/Transforms/IndVarSimplify/dont-recompute.ll


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 347289e6f201..69020219d9d4 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1353,16 +1353,16 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
 
         // Computing the value outside of the loop brings no benefit if it is
         // definitely used inside the loop in a way which can not be optimized
-        // away. Avoid doing so unless we know we have a value which computes
-        // the ExitValue already. TODO: This should be merged into SCEV
-        // expander to leverage its knowledge of existing expressions.
-        if (ReplaceExitValue != AlwaysRepl &&
-            !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
+        // away. Avoid doing so unless either we know we have a value
+        // which computes the ExitValue already, or it is cheap to do so.
+        // TODO: This should be merged into SCEV expander to leverage
+        // its knowledge of existing expressions.
+        bool HighCost = Rewriter.isHighCostExpansion(
+            ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
+        if (ReplaceExitValue != AlwaysRepl && HighCost &&
             hasHardUserWithinLoop(L, Inst))
           continue;
 
-        bool HighCost = Rewriter.isHighCostExpansion(
-            ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
 
         LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = "

diff  --git a/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll b/llvm/test/Transforms/IndVarSimplify/do-recompute-if-cheap.ll
similarity index 88%
rename from llvm/test/Transforms/IndVarSimplify/dont-recompute.ll
rename to llvm/test/Transforms/IndVarSimplify/do-recompute-if-cheap.ll
index e31c5827123b..869f55e1d6dc 100644
--- a/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll
+++ b/llvm/test/Transforms/IndVarSimplify/do-recompute-if-cheap.ll
@@ -1,10 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -indvars -S | FileCheck %s
 
-; This tests that the IV is not recomputed outside of the loop when it is known
-; to be computed by the loop and used in the loop any way. In the example below
-; although a's value can be computed outside of the loop, there is no benefit
-; in doing so as it has to be computed by the loop anyway.
+; This tests that the IV is recomputed outside of the loop even when it is known
+; to be computed by the loop and used in the loop any way, if it is cheap to do
+; so. In the example below the value can be computed outside of the loop,
+; and we should do so because after that IV is no longer used outside of
+; the loop, which is likely beneficial for vectorization.
 ;
 ; extern void func(unsigned val);
 ;
@@ -35,8 +36,8 @@ define void @test(i32 %m) nounwind uwtable {
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
-; CHECK-NEXT:    tail call void @func(i32 [[ADD_LCSSA]])
+; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 [[M]], 186
+; CHECK-NEXT:    tail call void @func(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -69,8 +70,8 @@ define i32 @test2(i32 %m) nounwind uwtable {
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
-; CHECK-NEXT:    ret i32 [[ADD_LCSSA]]
+; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 [[M]], 186
+; CHECK-NEXT:    ret i32 [[TMP0]]
 ;
 entry:
   br label %for.body
@@ -101,8 +102,8 @@ define void @test3(i32 %m) nounwind uwtable {
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
-; CHECK-NEXT:    tail call void @func(i32 [[ADD_LCSSA]])
+; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 [[M]], 186
+; CHECK-NEXT:    tail call void @func(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -141,8 +142,8 @@ define void @test4(i32 %m) nounwind uwtable {
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[SOFT_USE:%.*]] = add i32 [[ADD_LCSSA]], 123
+; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 [[M]], 186
+; CHECK-NEXT:    [[SOFT_USE:%.*]] = add i32 [[TMP0]], 123
 ; CHECK-NEXT:    tail call void @func(i32 [[SOFT_USE]])
 ; CHECK-NEXT:    ret void
 ;
@@ -178,8 +179,8 @@ define void @test5(i32 %m) nounwind uwtable {
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
-; CHECK-NEXT:    tail call void @func(i32 [[ADD_LCSSA]])
+; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 [[M]], 186
+; CHECK-NEXT:    tail call void @func(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -215,8 +216,8 @@ define void @test6(i32 %m, i32* %p) nounwind uwtable {
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
-; CHECK-NEXT:    tail call void @func(i32 [[ADD_LCSSA]])
+; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 [[M]], 186
+; CHECK-NEXT:    tail call void @func(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ;
 entry:

diff  --git a/llvm/test/Transforms/IndVarSimplify/elim-extend.ll b/llvm/test/Transforms/IndVarSimplify/elim-extend.ll
index 9de9ac2a1e07..7e245933c331 100644
--- a/llvm/test/Transforms/IndVarSimplify/elim-extend.ll
+++ b/llvm/test/Transforms/IndVarSimplify/elim-extend.ll
@@ -143,8 +143,7 @@ define void @nestedIV(i8* %address, i32 %limit) nounwind {
 ; CHECK-NEXT:    [[INNERCMP:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV_NEXT]]
 ; CHECK-NEXT:    br i1 [[INNERCMP]], label [[INNERLOOP]], label [[INNEREXIT:%.*]]
 ; CHECK:       innerexit:
-; CHECK-NEXT:    [[INNERCOUNT_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV_NEXT]], [[INNERLOOP]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[INNERCOUNT_LCSSA_WIDE]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP0]] to i32
 ; CHECK-NEXT:    br label [[OUTERMERGE]]
 ; CHECK:       outermerge:
 ; CHECK-NEXT:    [[INNERCOUNT_MERGE]] = phi i32 [ [[TMP4]], [[INNEREXIT]] ], [ [[INNERCOUNT]], [[INNERPREHEADER]] ]

diff  --git a/llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll b/llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll
index 6ef7a5ce9e26..5fc2b3068f19 100644
--- a/llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll
+++ b/llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll
@@ -26,8 +26,7 @@ define void @f(i32 %length.i.88, i32 %length.i, i8* %tmp12, i32 %tmp10, i8* %tmp
 ; CHECK-NEXT:    [[TMP23:%.*]] = icmp slt i32 [[TMP22]], [[TMP14]]
 ; CHECK-NEXT:    br i1 [[TMP23]], label [[NOT_ZERO11]], label [[MAIN_EXIT_SELECTOR:%.*]]
 ; CHECK:       main.exit.selector:
-; CHECK-NEXT:    [[TMP22_LCSSA:%.*]] = phi i32 [ [[TMP22]], [[NOT_ZERO11]] ]
-; CHECK-NEXT:    [[TMP24:%.*]] = icmp slt i32 [[TMP22_LCSSA]], [[LENGTH_I]]
+; CHECK-NEXT:    [[TMP24:%.*]] = icmp slt i32 [[TMP14]], [[LENGTH_I]]
 ; CHECK-NEXT:    br i1 [[TMP24]], label [[NOT_ZERO11_POSTLOOP]], label [[LEAVE:%.*]]
 ; CHECK:       leave:
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/IndVarSimplify/pr28705.ll b/llvm/test/Transforms/IndVarSimplify/pr28705.ll
index a6fed805dc65..b431a28a4888 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr28705.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr28705.ll
@@ -16,14 +16,14 @@ define void @foo(i32 %sub.ptr.div.i, i8* %ref.i1174) local_unnamed_addr {
 ; CHECK:       for.body650.lr.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY650:%.*]]
 ; CHECK:       loopexit:
-; CHECK-NEXT:    [[INC_I_I_LCSSA:%.*]] = phi i32 [ [[INC_I_I:%.*]], [[FOR_BODY650]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[DOTSROA_SPECULATED]], 1
 ; CHECK-NEXT:    br label [[XZ_EXIT]]
 ; CHECK:       XZ.exit:
-; CHECK-NEXT:    [[DB_SROA_9_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC_I_I_LCSSA]], [[LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    [[DB_SROA_9_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP0]], [[LOOPEXIT:%.*]] ]
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       for.body650:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[FOR_BODY650_LR_PH]] ], [ [[INC655:%.*]], [[FOR_BODY650]] ]
-; CHECK-NEXT:    [[IV2:%.*]] = phi i32 [ 1, [[FOR_BODY650_LR_PH]] ], [ [[INC_I_I]], [[FOR_BODY650]] ]
+; CHECK-NEXT:    [[IV2:%.*]] = phi i32 [ 1, [[FOR_BODY650_LR_PH]] ], [ [[INC_I_I:%.*]], [[FOR_BODY650]] ]
 ; CHECK-NEXT:    [[ARRAYIDX_I_I1105:%.*]] = getelementptr inbounds i8, i8* [[REF_I1174:%.*]], i32 [[IV2]]
 ; CHECK-NEXT:    store i8 7, i8* [[ARRAYIDX_I_I1105]], align 1
 ; CHECK-NEXT:    [[INC_I_I]] = add nuw nsw i32 [[IV2]], 1

diff  --git a/llvm/test/Transforms/IndVarSimplify/pr39673.ll b/llvm/test/Transforms/IndVarSimplify/pr39673.ll
index 7fb90a90071a..4ae9f4532d50 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr39673.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr39673.ll
@@ -72,8 +72,8 @@ define i16 @dom_argument(i16 %arg1, i16 %arg2) {
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i16 [[L2_ADD]], 2
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[LOOP2]], label [[LOOP2_END:%.*]]
 ; CHECK:       loop2.end:
-; CHECK-NEXT:    [[K2_ADD_LCSSA:%.*]] = phi i16 [ [[K2_ADD]], [[LOOP2]] ]
-; CHECK-NEXT:    ret i16 [[K2_ADD_LCSSA]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i16 [[ARG2]], 2
+; CHECK-NEXT:    ret i16 [[TMP0]]
 ;
 entry:
   br label %loop1
@@ -121,8 +121,8 @@ define i16 @dummy_phi_outside_loop(i16 %arg) {
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i16 [[L2_ADD]], 2
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[LOOP2]], label [[LOOP2_END:%.*]]
 ; CHECK:       loop2.end:
-; CHECK-NEXT:    [[K2_ADD_LCSSA:%.*]] = phi i16 [ [[K2_ADD]], [[LOOP2]] ]
-; CHECK-NEXT:    ret i16 [[K2_ADD_LCSSA]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i16 [[DUMMY]], 2
+; CHECK-NEXT:    ret i16 [[TMP0]]
 ;
 entry:
   br label %loop2.preheader
@@ -166,8 +166,8 @@ define i16 @neg_loop_carried(i16 %arg) {
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i16 [[L2_ADD]], 2
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[LOOP2]], label [[LOOP2_END:%.*]]
 ; CHECK:       loop2.end:
-; CHECK-NEXT:    [[K2_ADD_LCSSA:%.*]] = phi i16 [ [[K2_ADD]], [[LOOP2]] ]
-; CHECK-NEXT:    ret i16 [[K2_ADD_LCSSA]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i16 [[TMP0]], 2
+; CHECK-NEXT:    ret i16 [[TMP1]]
 ;
 entry:
   br label %loop1


        


More information about the llvm-commits mailing list