[llvm] r358958 - [LSR] Limit the recursion for setup cost

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 23 01:52:21 PDT 2019


Author: dmgreen
Date: Tue Apr 23 01:52:21 2019
New Revision: 358958

URL: http://llvm.org/viewvc/llvm-project?rev=358958&view=rev
Log:
[LSR] Limit the recursion for setup cost

In some circumstances we can end up with setup costs that are very complex to
compute, even though the scevs are not very complex to create. This can also
lead to setupcosts that are calculated to be exactly -1, which LSR treats as an
invalid cost. This patch puts a limit on the recursion depth for setup cost to
prevent them taking too long.

Thanks to @reames for the report and test case.

Differential Revision: https://reviews.llvm.org/D60944

Added:
    llvm/trunk/test/Transforms/LoopStrengthReduce/gnarly-setupcost.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
    llvm/trunk/test/CodeGen/Hexagon/swp-carried-1.ll
    llvm/trunk/test/Transforms/LoopStrengthReduce/two-combinations-bug.ll

Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=358958&r1=358957&r2=358958&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Tue Apr 23 01:52:21 2019
@@ -164,9 +164,9 @@ static cl::opt<unsigned> ComplexityLimit
   cl::init(std::numeric_limits<uint16_t>::max()),
   cl::desc("LSR search space complexity limit"));
 
-static cl::opt<bool> EnableRecursiveSetupCost(
-  "lsr-recursive-setupcost", cl::Hidden, cl::init(true),
-  cl::desc("Enable more thorough lsr setup cost calculation"));
+static cl::opt<unsigned> SetupCostDepthLimit(
+    "lsr-setupcost-depth-limit", cl::Hidden, cl::init(7),
+    cl::desc("The limit on recursion depth for LSRs setup cost"));
 
 #ifndef NDEBUG
 // Stress test IV chain generation.
@@ -1212,22 +1212,23 @@ static bool isAMCompletelyFolded(const T
                                  bool HasBaseReg, int64_t Scale,
                                  Instruction *Fixup = nullptr);
 
-static unsigned getSetupCost(const SCEV *Reg) {
+static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) {
   if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
     return 1;
-  if (!EnableRecursiveSetupCost)
+  if (Depth == 0)
     return 0;
   if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
-    return getSetupCost(S->getStart());
+    return getSetupCost(S->getStart(), Depth - 1);
   if (auto S = dyn_cast<SCEVCastExpr>(Reg))
-    return getSetupCost(S->getOperand());
+    return getSetupCost(S->getOperand(), Depth - 1);
   if (auto S = dyn_cast<SCEVNAryExpr>(Reg))
     return std::accumulate(S->op_begin(), S->op_end(), 0,
-                           [](unsigned i, const SCEV *Reg) {
-                             return i + getSetupCost(Reg);
+                           [&](unsigned i, const SCEV *Reg) {
+                             return i + getSetupCost(Reg, Depth - 1);
                            });
   if (auto S = dyn_cast<SCEVUDivExpr>(Reg))
-    return getSetupCost(S->getLHS()) + getSetupCost(S->getRHS());
+    return getSetupCost(S->getLHS(), Depth - 1) +
+           getSetupCost(S->getRHS(), Depth - 1);
   return 0;
 }
 
@@ -1293,7 +1294,9 @@ void Cost::RateRegister(const Formula &F
 
   // Rough heuristic; favor registers which don't require extra setup
   // instructions in the preheader.
-  C.SetupCost += getSetupCost(Reg);
+  C.SetupCost += getSetupCost(Reg, SetupCostDepthLimit);
+  // Ensure we don't, even with the recusion limit, produce invalid costs.
+  C.SetupCost = std::min<unsigned>(C.SetupCost, 1 << 16);
 
   C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
                SE->hasComputableLoopEvolution(Reg, L);

Modified: llvm/trunk/test/CodeGen/Hexagon/swp-carried-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/swp-carried-1.ll?rev=358958&r1=358957&r2=358958&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/swp-carried-1.ll (original)
+++ llvm/trunk/test/CodeGen/Hexagon/swp-carried-1.ll Tue Apr 23 01:52:21 2019
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -rdf-opt=0 -disable-hexagon-misched -hexagon-initial-cfg-cleanup=0 -lsr-recursive-setupcost=0 < %s | FileCheck %s
+; RUN: llc -march=hexagon -rdf-opt=0 -disable-hexagon-misched -hexagon-initial-cfg-cleanup=0 -lsr-setupcost-depth-limit=1 < %s | FileCheck %s
 
 ; Test that we generate the correct code when a loop carried value
 ; is scheduled one stage earlier than it's use. The code in

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/gnarly-setupcost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/gnarly-setupcost.ll?rev=358958&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/gnarly-setupcost.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/gnarly-setupcost.ll Tue Apr 23 01:52:21 2019
@@ -0,0 +1,93 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+
+; Check this completes sensibly. The sequence here is quick for scev to
+; calculate, but creates a very large tree if recursed into, the same node
+; being processed many times. I will also naturally have a setupcost of
+; 0xffffffff, which LSR will treat as invalid.
+; CHECK-LABEL: func
+; CHECK: load i32, i32* %gep
+
+define i32 @func(i32* %in) {
+entry:
+  %load = load i32, i32* %in, align 4
+  %a1 = add i32 %load, 1
+  %m1 = mul i32 %a1, %load
+  %a2 = add i32 %m1, 1
+  %m2 = mul i32 %a2, %m1
+  %a3 = add i32 %m2, 1
+  %m3 = mul i32 %a3, %m2
+  %a4 = add i32 %m3, 1
+  %m4 = mul i32 %a4, %m3
+  %a5 = add i32 %m4, 1
+  %m5 = mul i32 %a5, %m4
+  %a6 = add i32 %m5, 1
+  %m6 = mul i32 %a6, %m5
+  %a7 = add i32 %m6, 1
+  %m7 = mul i32 %a7, %m6
+  %a8 = add i32 %m7, 1
+  %m8 = mul i32 %a8, %m7
+  %a9 = add i32 %m8, 1
+  %m9 = mul i32 %a9, %m8
+  %a10 = add i32 %m9, 1
+  %m10 = mul i32 %a10, %m9
+  %a11 = add i32 %m10, 1
+  %m11 = mul i32 %a11, %m10
+  %a12 = add i32 %m11, 1
+  %m12 = mul i32 %a12, %m11
+  %a13 = add i32 %m12, 1
+  %m13 = mul i32 %a13, %m12
+  %a14 = add i32 %m13, 1
+  %m14 = mul i32 %a14, %m13
+  %a15 = add i32 %m14, 1
+  %m15 = mul i32 %a15, %m14
+  %a16 = add i32 %m15, 1
+  %m16 = mul i32 %a16, %m15
+  %a17 = add i32 %m16, 1
+  %m17 = mul i32 %a17, %m16
+  %a18 = add i32 %m17, 1
+  %m18 = mul i32 %a18, %m17
+  %a19 = add i32 %m18, 1
+  %m19 = mul i32 %a19, %m18
+  %a20 = add i32 %m19, 1
+  %m20 = mul i32 %a20, %m19
+  %a21 = add i32 %m20, 1
+  %m21 = mul i32 %a21, %m20
+  %a22 = add i32 %m21, 1
+  %m22 = mul i32 %a22, %m21
+  %a23 = add i32 %m22, 1
+  %m23 = mul i32 %a23, %m22
+  %a24 = add i32 %m23, 1
+  %m24 = mul i32 %a24, %m23
+  %a25 = add i32 %m24, 1
+  %m25 = mul i32 %a25, %m24
+  %a26 = add i32 %m25, 1
+  %m26 = mul i32 %a26, %m25
+  %a27 = add i32 %m26, 1
+  %m27 = mul i32 %a27, %m26
+  %a28 = add i32 %m27, 1
+  %m28 = mul i32 %a28, %m27
+  %a29 = add i32 %m28, 1
+  %m29 = mul i32 %a29, %m28
+  %a30 = add i32 %m29, 1
+  %m30 = mul i32 %a30, %m29
+  %a31 = add i32 %m30, 1
+  %m31 = mul i32 %a31, %m30
+  br label %loop
+
+loop:
+  %lp = phi i32 [ %m31, %entry ], [ %linc, %loop ]
+  %0 = sext i32 %lp to i64
+  %gep = getelementptr inbounds i32, i32* %in, i64 %0
+  %loopload = load i32, i32* %gep, align 4
+  store i32 0, i32* %gep, align 4
+  %linc = add i32 %lp, 1
+  %lcmp = icmp eq i32 %linc, 100
+  br i1 %lcmp, label %exit, label %loop
+
+exit:
+  %ll = phi i32 [ %loopload, %loop ]
+  ret i32 %ll
+}
+

Modified: llvm/trunk/test/Transforms/LoopStrengthReduce/two-combinations-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/two-combinations-bug.ll?rev=358958&r1=358957&r2=358958&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/two-combinations-bug.ll (original)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/two-combinations-bug.ll Tue Apr 23 01:52:21 2019
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-reduce -lsr-recursive-setupcost=0 -S | FileCheck %s
+; RUN: opt < %s -loop-reduce -lsr-setupcost-depth-limit=1 -S | FileCheck %s
 
 ; This test is adapted from the n-body test of the LLVM test-suite: A bug in
 ; r345114 caused LSR to generate incorrect code. The test verifies that the




More information about the llvm-commits mailing list