[llvm] [Passes] Run IndVars after late partial unrolling. (PR #81275)

via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 9 08:51:40 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Florian Hahn (fhahn)

<details>
<summary>Changes</summary>

After aea2a1473631, partial unrolling now happens much later in the pipeline. At this point, we miss a number of simplification passes to further optimize the partially unrolled loop.

In some cases, this can cause notable performance regressions.

To improve codegen for partial unrolled loop bodies, run IndVars on demand on partially unrolled loops.

This patch uses a approach similar to the way extra cleanup passes are run on demand after vectorization (added in acea6e9cfa4c4a0e8678c7) and if we decide to make wider use of this pattern, we should probably factor our the pass manager/analysis logic, before landing this.

Another instance that can be improved by this pattern: https://github.com/llvm/llvm-project/pull/81271

Compile-time impact (geomeans) ranges from +0.11% to +0.19%. https://llvm-compile-time-tracker.com/compare.php?from=68cef34784e33539b75f2c379d8e21a87719d67c&to=446d5e5b2fbfb5efc6cbc40584a31564327df584&stat=instructions:u

This could be reduced by only running a small subset of IndVars to optimize inductions where the impact ranges from +0.05% to +0.10%. https://llvm-compile-time-tracker.com/compare.php?from=68cef34784e33539b75f2c379d8e21a87719d67c&to=bbee5fa003136618446dc30e6ac3897ab03facd7&stat=instructions%3Au

---
Full diff: https://github.com/llvm/llvm-project/pull/81275.diff


6 Files Affected:

- (modified) llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h (+34) 
- (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+6) 
- (modified) llvm/lib/Passes/PassRegistry.def (+3) 
- (modified) llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp (+8-1) 
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+2) 
- (modified) llvm/test/Transforms/PhaseOrdering/AArch64/extra-unroll-simplifications.ll (+2-4) 


``````````diff
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
index d09fc328c452ff..e6e9e6065fed84 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -56,8 +56,10 @@
 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H
 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H
 
+#include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include <functional>
 
 namespace llvm {
@@ -111,6 +113,38 @@ struct ExtraVectorPassManager : public FunctionPassManager {
   }
 };
 
+struct ShouldRunExtraUnrollPasses
+    : public AnalysisInfoMixin<ShouldRunExtraUnrollPasses> {
+  static AnalysisKey Key;
+  struct Result {
+    SmallPtrSet<Loop *, 4> Loops;
+    bool invalidate(Function &F, const PreservedAnalyses &PA,
+                    FunctionAnalysisManager::Invalidator &) {
+      // Check whether the analysis has been explicitly invalidated. Otherwise,
+      // it remains preserved.
+      auto PAC = PA.getChecker<ShouldRunExtraUnrollPasses>();
+      return !PAC.preservedWhenStateless();
+    }
+  };
+
+  Result run(Function &F, FunctionAnalysisManager &FAM) { return Result(); }
+};
+
+template <typename MarkerT>
+struct ExtraLoopPassManager : public LoopPassManager {
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+                        LoopStandardAnalysisResults &AR, LPMUpdater &U) {
+    auto PA = PreservedAnalyses::all();
+    if (auto *X = AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR)
+                      .getCachedResult<ShouldRunExtraUnrollPasses>(
+                          *L.getHeader()->getParent()))
+      if (X->Loops.contains(&L))
+        PA.intersect(LoopPassManager::run(L, AM, AR, U));
+    // PA.abandon<MarkerT>();
+    return PA;
+  }
+};
+
 struct LoopVectorizeOptions {
   /// If false, consider all loops for interleaving.
   /// If true, only loops that explicitly request interleaving are considered.
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 6ede8638291206..cb5c88521b2bd7 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1272,6 +1272,12 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
     FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
         Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
         PTO.ForgetAllSCEVInLoopUnroll)));
+    {
+      ExtraLoopPassManager<ShouldRunExtraUnrollPasses> ExtraPasses;
+      ExtraPasses.addPass((IndVarSimplifyPass()));
+      FPM.addPass(createFunctionToLoopPassAdaptor(std::move(ExtraPasses)));
+    }
+
     FPM.addPass(WarnMissedTransformationsPass());
     // Now that we are done with loop unrolling, be it either by LoopVectorizer,
     // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 6cb87fba426463..42d70f7273565d 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -270,6 +270,9 @@ FUNCTION_ANALYSIS("should-not-run-function-passes",
                   ShouldNotRunFunctionPassesAnalysis())
 FUNCTION_ANALYSIS("should-run-extra-vector-passes",
                   ShouldRunExtraVectorPasses())
+FUNCTION_ANALYSIS("should-run-extra-unroll-passes",
+                  ShouldRunExtraUnrollPasses())
+
 FUNCTION_ANALYSIS("ssp-layout", SSPLayoutAnalysis())
 FUNCTION_ANALYSIS("stack-safety-local", StackSafetyAnalysis())
 FUNCTION_ANALYSIS("target-ir",
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 75fb8765061edf..6c01271689c8b4 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -58,6 +58,7 @@
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SizeOpts.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/Transforms/Vectorize/LoopVectorize.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -1601,6 +1602,7 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
   SmallPriorityWorklist<Loop *, 4> Worklist;
   appendLoopsToWorklist(LI, Worklist);
 
+  auto PA = getLoopPassPreservedAnalyses();
   while (!Worklist.empty()) {
     // Because the LoopInfo stores the loops in RPO, we walk the worklist
     // from back to front so that we work forward across the CFG, which
@@ -1629,6 +1631,11 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
         UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling,
         UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount);
     Changed |= Result != LoopUnrollResult::Unmodified;
+    if (Result == LoopUnrollResult::PartiallyUnrolled) {
+      auto &E = AM.getResult<ShouldRunExtraUnrollPasses>(F);
+      E.Loops.insert(&L);
+      PA.preserve<ShouldRunExtraUnrollPasses>();
+    }
 
     // The parent must not be damaged by unrolling!
 #ifndef NDEBUG
@@ -1644,7 +1651,7 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
   if (!Changed)
     return PreservedAnalyses::all();
 
-  return getLoopPassPreservedAnalyses();
+  return PA;
 }
 
 void LoopUnrollPass::printPipeline(
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1a7b301c35f2b8..5b3503066242f5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -487,6 +487,8 @@ namespace llvm {
 
 AnalysisKey ShouldRunExtraVectorPasses::Key;
 
+AnalysisKey ShouldRunExtraUnrollPasses::Key;
+
 /// InnerLoopVectorizer vectorizes loops which contain only one basic
 /// block to a specified vectorization factor (VF).
 /// This class performs the widening of scalars into vectors, or multiple
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/extra-unroll-simplifications.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/extra-unroll-simplifications.ll
index 6132c35c96ca32..da11888e87ed6d 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/extra-unroll-simplifications.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/extra-unroll-simplifications.ll
@@ -20,7 +20,6 @@ define void @partial_unroll_forced(i32 %N, ptr %src, ptr noalias %dst) {
 ; CHECK-NEXT:    br label [[LOOP_LATCH:%.*]]
 ; CHECK:       loop.latch:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[LOOP_LATCH_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_1:%.*]], [[LOOP_LATCH]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[LOOP_LATCH_PREHEADER_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[LOOP_LATCH]] ]
 ; CHECK-NEXT:    [[SRC_IDX:%.*]] = getelementptr <8 x half>, ptr [[SRC]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[L:%.*]] = load <8 x half>, ptr [[SRC_IDX]], align 16
 ; CHECK-NEXT:    [[DST_IDX:%.*]] = getelementptr <8 x half>, ptr [[DST]], i64 [[INDVARS_IV]]
@@ -32,9 +31,8 @@ define void @partial_unroll_forced(i32 %N, ptr %src, ptr noalias %dst) {
 ; CHECK-NEXT:    [[DST_IDX_1:%.*]] = getelementptr <8 x half>, ptr [[DST]], i64 [[INDVARS_IV_NEXT]]
 ; CHECK-NEXT:    [[ADD_1:%.*]] = fadd <8 x half> [[L_1]], [[L_1]]
 ; CHECK-NEXT:    store <8 x half> [[ADD_1]], ptr [[DST_IDX_1]], align 16
-; CHECK-NEXT:    [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV]], 2
-; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
-; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_1]] = add i64 [[INDVARS_IV]], 2
+; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_1]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_LOOPEXIT_UNR_LCSSA]], label [[LOOP_LATCH]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       exit.loopexit.unr-lcssa:
 ; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[LOOP_LATCH_PREHEADER]] ], [ [[INDVARS_IV_NEXT_1]], [[LOOP_LATCH]] ]

``````````

</details>


https://github.com/llvm/llvm-project/pull/81275


More information about the llvm-commits mailing list