[llvm] [Passes] Run SimpleLoopUnswitch after introducing invariant branches. (PR #81271)

via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 9 08:41:18 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Florian Hahn (fhahn)

<details>
<summary>Changes</summary>

IndVars may be able to replace a loop dependent condition with a loop invariant one, but loop-unswitch runs before IndVars, so the invariant check remains in the loop.

For an example, consider a read-only loop with a bounds check: https://godbolt.org/z/8cdj4qhbG

This patch uses a approach similar to the way extra cleanup passes are run on demand after vectorization (added in acea6e9cfa4c4a0e8678c7).

It introduces a new ShouldRunExtraSimpleLoopUnswitch analysis marker, which IndVars can use to indicate that extra unswitching is beneficial.

ExtraSimpleLoopUnswitchPassManager uses this analysis to determine whether to run its passes on a loop.

Compile-time impact (geomean) ranges from +0.0% to 0.02% https://llvm-compile-time-tracker.com/compare.php?from=138c0beb109ffe47f75a0fe8c4dc2cdabe8a6532&to=19e6e99eeb280d426907ea73a21b139ba7225627&stat=instructions%3Au

Compile-time impact (geomean) of unconditionally running SimpleLoopUnswitch ranges from +0.05% - +0.16%
https://llvm-compile-time-tracker.com/compare.php?from=138c0beb109ffe47f75a0fe8c4dc2cdabe8a6532&to=2930dfd5accdce2e6f8d5146ae4d626add2065a2&stat=instructions:u

Unconditionally running SimpleLoopUnswitch seems to indicate that there are multiple other scenarios where we fail to run unswitching when opportunities remain.

---
Full diff: https://github.com/llvm/llvm-project/pull/81271.diff


8 Files Affected:

- (modified) llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h (+35) 
- (modified) llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h (+8-4) 
- (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+7) 
- (modified) llvm/lib/Passes/PassRegistry.def (+3) 
- (modified) llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (+15-2) 
- (modified) llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp (+1) 
- (modified) llvm/lib/Transforms/Utils/SimplifyIndVar.cpp (+11-6) 
- (modified) llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll (+43-11) 


``````````diff
diff --git a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
index b97ee23fc0e65d..a40b0a2a9ab72f 100644
--- a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
+++ b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
@@ -12,6 +12,7 @@
 #include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
 
 namespace llvm {
 
@@ -20,6 +21,40 @@ class Loop;
 class StringRef;
 class raw_ostream;
 
+struct ShouldRunExtraSimpleLoopUnswitch
+    : public AnalysisInfoMixin<ShouldRunExtraSimpleLoopUnswitch> {
+  static AnalysisKey Key;
+  struct Result {
+    bool invalidate(Loop &L, const PreservedAnalyses &PA,
+                    LoopAnalysisManager::Invalidator &) {
+      // Check whether the analysis has been explicitly invalidated. Otherwise,
+      // it remains preserved.
+      auto PAC = PA.getChecker<ShouldRunExtraSimpleLoopUnswitch>();
+      return !PAC.preservedWhenStateless();
+    }
+  };
+
+  Result run(Loop &L, LoopAnalysisManager &AM,
+             LoopStandardAnalysisResults &AR) {
+    return Result();
+  }
+
+  static bool isRequired() { return true; }
+};
+
+struct ExtraSimpleLoopUnswitchPassManager : public LoopPassManager {
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+                        LoopStandardAnalysisResults &AR, LPMUpdater &U) {
+    auto PA = PreservedAnalyses::all();
+    if (AM.getCachedResult<ShouldRunExtraSimpleLoopUnswitch>(L))
+      PA.intersect(LoopPassManager::run(L, AM, AR, U));
+    PA.abandon<ShouldRunExtraSimpleLoopUnswitch>();
+    return PA;
+  }
+
+  static bool isRequired() { return true; }
+};
+
 /// This pass transforms loops that contain branches or switches on loop-
 /// invariant conditions to have multiple loops. For example, it turns the left
 /// into the right code:
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
index ff60811b616859..b39ca05f3f4b14 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
@@ -15,6 +15,8 @@
 #ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H
 #define LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H
 
+#include <utility>
+
 namespace llvm {
 
 class Type;
@@ -47,10 +49,12 @@ class IVVisitor {
 
 /// simplifyUsersOfIV - Simplify instructions that use this induction variable
 /// by using ScalarEvolution to analyze the IV's recurrence.
-bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
-                       LoopInfo *LI, const TargetTransformInfo *TTI,
-                       SmallVectorImpl<WeakTrackingVH> &Dead,
-                       SCEVExpander &Rewriter, IVVisitor *V = nullptr);
+std::pair<bool, bool> simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE,
+                                        DominatorTree *DT, LoopInfo *LI,
+                                        const TargetTransformInfo *TTI,
+                                        SmallVectorImpl<WeakTrackingVH> &Dead,
+                                        SCEVExpander &Rewriter,
+                                        IVVisitor *V = nullptr);
 
 /// SimplifyLoopIVs - Simplify users of induction variables within this
 /// loop. This does not actually change or add IVs.
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 6ede8638291206..8187d78d41e928 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -627,6 +627,13 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   LPM2.addPass(LoopIdiomRecognizePass());
   LPM2.addPass(IndVarSimplifyPass());
 
+  {
+    ExtraSimpleLoopUnswitchPassManager ExtraPasses;
+    ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
+                                               OptimizationLevel::O3));
+    LPM2.addPass(std::move(ExtraPasses));
+  }
+
   invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);
 
   LPM2.addPass(LoopDeletionPass());
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 6cb87fba426463..4ac2a6b48df220 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -583,6 +583,9 @@ LOOP_ANALYSIS("ddg", DDGAnalysis())
 LOOP_ANALYSIS("iv-users", IVUsersAnalysis())
 LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis())
 LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
+LOOP_ANALYSIS("should-run-extra-simple-loop-unswitch",
+                  ShouldRunExtraSimpleLoopUnswitch())
+
 #undef LOOP_ANALYSIS
 
 #ifndef LOOP_PASS
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 41c4d623617347..b2905109af327e 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -70,6 +70,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
@@ -137,6 +138,8 @@ class IndVarSimplify {
   SmallVector<WeakTrackingVH, 16> DeadInsts;
   bool WidenIndVars;
 
+  bool RunUnswitching = false;
+
   bool handleFloatingPointIV(Loop *L, PHINode *PH);
   bool rewriteNonIntegerIVs(Loop *L);
 
@@ -170,6 +173,8 @@ class IndVarSimplify {
   }
 
   bool run(Loop *L);
+
+  bool runUnswitching() const { return RunUnswitching; }
 };
 
 } // end anonymous namespace
@@ -614,9 +619,11 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L,
       // Information about sign/zero extensions of CurrIV.
       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
 
-      Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
-                                   &Visitor);
+      const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
+                                             Rewriter, &Visitor);
 
+      Changed |= C;
+      RunUnswitching |= U;
       if (Visitor.WI.WidestNativeType) {
         WideIVs.push_back(Visitor.WI);
       }
@@ -1873,6 +1880,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
     if (OldCond->use_empty())
       DeadInsts.emplace_back(OldCond);
     Changed = true;
+    RunUnswitching = true;
   }
 
   return Changed;
@@ -2058,6 +2066,11 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
 
   auto PA = getLoopPassPreservedAnalyses();
   PA.preserveSet<CFGAnalyses>();
+  if (IVS.runUnswitching()) {
+    AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(L, AR);
+    PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
+  }
+
   if (AR.MSSA)
     PA.preserve<MemorySSAAnalysis>();
   return PA;
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 7eb0ba1c2c1793..2bfee6e4c0e421 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -133,6 +133,7 @@ static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(
                          "not-taken 1/<this option> times or less."),
     cl::init(16));
 
+AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key;
 namespace {
 struct CompareDesc {
   BranchInst *Term;
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 1b142f14d81139..07949094431919 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -59,6 +59,7 @@ namespace {
     SmallVectorImpl<WeakTrackingVH> &DeadInsts;
 
     bool Changed = false;
+    bool RunUnswitching = false;
 
   public:
     SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
@@ -71,6 +72,7 @@ namespace {
     }
 
     bool hasChanged() const { return Changed; }
+    bool runUnswitching() const { return RunUnswitching; }
 
     /// Iteratively perform simplification on a worklist of users of the
     /// specified induction variable. This is the top-level driver that applies
@@ -232,6 +234,7 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
   ICmp->setPredicate(InvariantPredicate);
   ICmp->setOperand(0, NewLHS);
   ICmp->setOperand(1, NewRHS);
+  RunUnswitching = true;
   return true;
 }
 
@@ -981,14 +984,15 @@ void IVVisitor::anchor() { }
 
 /// Simplify instructions that use this induction variable
 /// by using ScalarEvolution to analyze the IV's recurrence.
-bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
-                       LoopInfo *LI, const TargetTransformInfo *TTI,
-                       SmallVectorImpl<WeakTrackingVH> &Dead,
-                       SCEVExpander &Rewriter, IVVisitor *V) {
+std::pair<bool, bool> simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE,
+                                        DominatorTree *DT, LoopInfo *LI,
+                                        const TargetTransformInfo *TTI,
+                                        SmallVectorImpl<WeakTrackingVH> &Dead,
+                                        SCEVExpander &Rewriter, IVVisitor *V) {
   SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI,
                      Rewriter, Dead);
   SIV.simplifyUsers(CurrIV, V);
-  return SIV.hasChanged();
+  return {SIV.hasChanged(), SIV.runUnswitching()};
 }
 
 /// Simplify users of induction variables within this
@@ -1002,8 +1006,9 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
 #endif
   bool Changed = false;
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
-    Changed |=
+    const auto &[C, _] =
         simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, TTI, Dead, Rewriter);
+    Changed |= C;
   }
   return Changed;
 }
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
index c6c9a52167d54e..ddb8e6133ebf63 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
@@ -14,24 +14,50 @@ define i32 @read_only_loop_with_runtime_check(ptr noundef %array, i32 noundef %c
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -1
 ; CHECK-NEXT:    [[DOTNOT_NOT:%.*]] = icmp ult i32 [[TMP1]], [[COUNT]]
+; CHECK-NEXT:    br i1 [[DOTNOT_NOT]], label [[FOR_BODY_PREHEADER10:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       for.body.preheader10:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER13:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967288
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI11:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT:    [[TMP5]] = add <4 x i32> [[WIDE_LOAD12]], [[VEC_PHI11]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP5]], [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER13]]
+; CHECK:       for.body.preheader13:
+; CHECK-NEXT:    [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[SUM_07_PH:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END]] ]
-; CHECK-NEXT:    [[SUM_07:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[ADD]], [[IF_END]] ]
-; CHECK-NEXT:    br i1 [[DOTNOT_NOT]], label [[IF_END]], label [[IF_THEN:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    tail call void @llvm.trap()
-; CHECK-NEXT:    unreachable
-; CHECK:       if.end:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER13]] ]
+; CHECK-NEXT:    [[SUM_07:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[SUM_07_PH]], [[FOR_BODY_PREHEADER13]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ADD]] = add nsw i32 [[TMP2]], [[SUM_07]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD]] = add nsw i32 [[TMP8]], [[SUM_07]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       if.then:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
 ;
 entry:
   %array.addr = alloca ptr, align 8
@@ -96,3 +122,9 @@ declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
 declare void @llvm.trap()
 
 declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.

``````````

</details>


https://github.com/llvm/llvm-project/pull/81271


More information about the llvm-commits mailing list