[llvm] 10da984 - [LSR] Drop LSR solution if it is less profitable than baseline

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 27 10:14:03 PDT 2022


Author: eopXD
Date: 2022-10-27T10:13:57-07:00
New Revision: 10da9844d072df41d3eeeb6f104e2160fc7a2193

URL: https://github.com/llvm/llvm-project/commit/10da9844d072df41d3eeeb6f104e2160fc7a2193
DIFF: https://github.com/llvm/llvm-project/commit/10da9844d072df41d3eeeb6f104e2160fc7a2193.diff

LOG: [LSR] Drop LSR solution if it is less profitable than baseline

The LSR may suggest less profitable transformation to the loop. This
patch adds check to prevent LSR from generating worse code than what
we already have.

Since LSR affects nearly all targets, the patch is guarded by the
option 'lsr-drop-solution' and default as disable for now.

The next step should be extending an TTI interface to allow target(s)
to enable this enhancememnt.

Debug log is added to remind user of such choice to skip the LSR
solution.

Reviewed By: Meinersbur, #loopoptwg

Differential Revision: https://reviews.llvm.org/D126043

Added: 
    llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll

Modified: 
    llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
    llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 07175a5fda9e3..f9df7ba54b334 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -191,6 +191,10 @@ static cl::opt<bool> AllowTerminatingConditionFoldingAfterLSR(
     "lsr-term-fold", cl::Hidden, cl::init(false),
     cl::desc("Attempt to replace primary IV with other IV."));
 
+static cl::opt<bool> AllowDropSolutionIfLessProfitable(
+    "lsr-drop-solution", cl::Hidden, cl::init(false),
+    cl::desc("Attempt to drop solution if it is less profitable"));
+
 STATISTIC(NumTermFold,
           "Number of terminating condition fold recognized and performed");
 
@@ -1975,6 +1979,10 @@ class LSRInstance {
   /// SmallDenseSet.
   SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
 
+  /// The cost of the current SCEV, the best solution by LSR will be dropped if
+  /// the solution is not profitable.
+  Cost BaselineCost;
+
   /// Interesting use types, to facilitate truncation reuse.
   SmallSetVector<Type *, 4> Types;
 
@@ -3294,6 +3302,11 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
   BranchInst *ExitBranch = nullptr;
   bool SaveCmp = TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
 
+  // For calculating baseline cost
+  SmallPtrSet<const SCEV *, 16> Regs;
+  DenseSet<const SCEV *> VisitedRegs;
+  DenseSet<size_t> VisitedLSRUse;
+
   for (const IVStrideUse &U : IU) {
     Instruction *UserInst = U.getUser();
     // Skip IV users that are part of profitable IV Chains.
@@ -3387,6 +3400,14 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
     LF.Offset = Offset;
     LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
 
+    // Create SCEV as Formula for calculating baseline cost
+    if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
+      Formula F;
+      F.initialMatch(S, L, SE);
+      BaselineCost.RateFormula(F, Regs, VisitedRegs, LU);
+      VisitedLSRUse.insert(LUIdx);
+    }
+
     if (!LU.WidestFixupType ||
         SE.getTypeSizeInBits(LU.WidestFixupType) <
         SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
@@ -5162,6 +5183,20 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
              });
 
   assert(Solution.size() == Uses.size() && "Malformed solution!");
+
+  if (BaselineCost.isLess(SolutionCost)) {
+    LLVM_DEBUG(dbgs() << "The baseline solution requires ";
+               BaselineCost.print(dbgs()); dbgs() << "\n");
+    if (!AllowDropSolutionIfLessProfitable)
+      LLVM_DEBUG(
+          dbgs() << "Baseline is more profitable than chosen solution, "
+                    "add option 'lsr-drop-solution' to drop LSR solution.\n");
+    else {
+      LLVM_DEBUG(dbgs() << "Baseline is more profitable than chosen "
+                           "solution, dropping LSR solution.\n";);
+      Solution.clear();
+    }
+  }
 }
 
 /// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
@@ -5706,7 +5741,8 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
       MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0
                             ? PreferredAddresingMode
                             : TTI.getPreferredAddressingMode(L, &SE)),
-      Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", false) {
+      Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", false),
+      BaselineCost(L, SE, TTI, AMK) {
   // If LoopSimplify form is not available, stay out of trouble.
   if (!L->isLoopSimplifyForm())
     return;

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll
new file mode 100644
index 0000000000000..37876a9071240
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll
@@ -0,0 +1,55 @@
+; REQUIRES: asserts
+; RUN: llc < %s -O3 -mattr=+v -debug -lsr-drop-solution 2>&1 | FileCheck --check-prefix=DEBUG %s
+; RUN: llc < %s -O3 -mattr=+v -debug 2>&1 | FileCheck --check-prefix=DEBUG2 %s
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
+target triple = "riscv64-unknown-linux-gnu"
+
+define ptr @foo(ptr %a0, ptr %a1, i64 %a2) {
+;DEBUG: The chosen solution requires 3 instructions 6 regs, with addrec cost 1, plus 2 base adds, plus 5 setup cost
+;DEBUG: The baseline solution requires 2 instructions 4 regs, with addrec cost 2, plus 3 setup cost
+;DEBUG: Baseline is more profitable than chosen solution, dropping LSR solution.
+
+;DEBUG2: Baseline is more profitable than chosen solution, add option 'lsr-drop-solution' to drop LSR solution.
+entry:
+  %0 = ptrtoint ptr %a0 to i64
+  %1 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %a2, i64 0, i64 3)
+  %cmp.not = icmp eq i64 %1, %a2
+  br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:                                        ; preds = %entry
+  %add = add i64 %0, %a2
+  %sub = sub i64 %add, %1
+  br label %do.body
+
+do.body:                                        ; preds = %do.body, %if.then
+  %a3.0 = phi i64 [ %0, %if.then ], [ %add1, %do.body ]
+  %a1.addr.0 = phi ptr [ %a1, %if.then ], [ %add.ptr, %do.body ]
+  %2 = tail call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8> undef, ptr %a1.addr.0, i64 %1)
+  %3 = inttoptr i64 %a3.0 to ptr
+  tail call void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8> %2, ptr %3, i64 %1)
+  %add1 = add i64 %a3.0, %1
+  %add.ptr = getelementptr i8, ptr %a1.addr.0, i64 %1
+  %cmp2 = icmp ugt i64 %sub, %add1
+  br i1 %cmp2, label %do.body, label %do.end
+
+do.end:                                         ; preds = %do.body
+  %sub4 = sub i64 %add, %add1
+  %4 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub4, i64 0, i64 3)
+  br label %if.end
+
+if.end:                                         ; preds = %do.end, %entry
+  %a3.1 = phi i64 [ %add1, %do.end ], [ %0, %entry ]
+  %t0.0 = phi i64 [ %4, %do.end ], [ %a2, %entry ]
+  %a1.addr.1 = phi ptr [ %add.ptr, %do.end ], [ %a1, %entry ]
+  %5 = tail call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8> undef, ptr %a1.addr.1, i64 %t0.0)
+  %6 = inttoptr i64 %a3.1 to ptr
+  tail call void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8> %5, ptr %6, i64 %t0.0)
+  ret ptr %a0
+}
+
+declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
+
+declare <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8>, ptr nocapture, i64)
+
+declare void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8>, ptr nocapture, i64)

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution.ll
index 7818e6789c151..f62f14140509f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -O3 -mattr=+v | FileCheck %s
+; RUN: llc < %s -O3 -mattr=+v -lsr-drop-solution | FileCheck --check-prefix=CHECK %s
 
 target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
 target triple = "riscv64-unknown-linux-gnu"
@@ -16,23 +16,20 @@ define ptr @foo(ptr %a0, ptr %a1, i64 %a2) {
 ; CHECK-NEXT:    vse8.v v8, (a3)
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB0_2: # %if.then
-; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    add a3, a0, a2
-; CHECK-NEXT:    sub a6, a3, a4
+; CHECK-NEXT:    add a2, a0, a2
+; CHECK-NEXT:    sub a5, a2, a4
+; CHECK-NEXT:    mv a3, a0
 ; CHECK-NEXT:  .LBB0_3: # %do.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    add a3, a1, a5
 ; CHECK-NEXT:    vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a3)
-; CHECK-NEXT:    add a7, a0, a5
-; CHECK-NEXT:    add a5, a5, a4
-; CHECK-NEXT:    add a3, a0, a5
-; CHECK-NEXT:    vse8.v v8, (a7)
-; CHECK-NEXT:    bltu a3, a6, .LBB0_3
+; CHECK-NEXT:    vle8.v v8, (a1)
+; CHECK-NEXT:    vse8.v v8, (a3)
+; CHECK-NEXT:    add a3, a3, a4
+; CHECK-NEXT:    add a1, a1, a4
+; CHECK-NEXT:    bltu a3, a5, .LBB0_3
 ; CHECK-NEXT:  # %bb.4: # %do.end
-; CHECK-NEXT:    sub a2, a2, a5
+; CHECK-NEXT:    sub a2, a2, a3
 ; CHECK-NEXT:    vsetvli a2, a2, e8, m8, ta, mu
-; CHECK-NEXT:    add a1, a1, a5
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a1)
 ; CHECK-NEXT:    vse8.v v8, (a3)


        


More information about the llvm-commits mailing list