[llvm] [Passes] Run SimpleLoopUnswitch after introducing invariant branches. (PR #81271)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 9 08:41:18 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
IndVars may be able to replace a loop dependent condition with a loop invariant one, but loop-unswitch runs before IndVars, so the invariant check remains in the loop.
For an example, consider a read-only loop with a bounds check: https://godbolt.org/z/8cdj4qhbG
This patch uses a approach similar to the way extra cleanup passes are run on demand after vectorization (added in acea6e9cfa4c4a0e8678c7).
It introduces a new ShouldRunExtraSimpleLoopUnswitch analysis marker, which IndVars can use to indicate that extra unswitching is beneficial.
ExtraSimpleLoopUnswitchPassManager uses this analysis to determine whether to run its passes on a loop.
Compile-time impact (geomean) ranges from +0.0% to 0.02% https://llvm-compile-time-tracker.com/compare.php?from=138c0beb109ffe47f75a0fe8c4dc2cdabe8a6532&to=19e6e99eeb280d426907ea73a21b139ba7225627&stat=instructions%3Au
Compile-time impact (geomean) of unconditionally running SimpleLoopUnswitch ranges from +0.05% - +0.16%
https://llvm-compile-time-tracker.com/compare.php?from=138c0beb109ffe47f75a0fe8c4dc2cdabe8a6532&to=2930dfd5accdce2e6f8d5146ae4d626add2065a2&stat=instructions:u
Unconditionally running SimpleLoopUnswitch seems to indicate that there are multiple other scenarios where we fail to run unswitching when opportunities remain.
---
Full diff: https://github.com/llvm/llvm-project/pull/81271.diff
8 Files Affected:
- (modified) llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h (+35)
- (modified) llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h (+8-4)
- (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+7)
- (modified) llvm/lib/Passes/PassRegistry.def (+3)
- (modified) llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (+15-2)
- (modified) llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp (+1)
- (modified) llvm/lib/Transforms/Utils/SimplifyIndVar.cpp (+11-6)
- (modified) llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll (+43-11)
``````````diff
diff --git a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
index b97ee23fc0e65d..a40b0a2a9ab72f 100644
--- a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
+++ b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
@@ -12,6 +12,7 @@
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
@@ -20,6 +21,40 @@ class Loop;
class StringRef;
class raw_ostream;
+struct ShouldRunExtraSimpleLoopUnswitch
+ : public AnalysisInfoMixin<ShouldRunExtraSimpleLoopUnswitch> {
+ static AnalysisKey Key;
+ struct Result {
+ bool invalidate(Loop &L, const PreservedAnalyses &PA,
+ LoopAnalysisManager::Invalidator &) {
+ // Check whether the analysis has been explicitly invalidated. Otherwise,
+ // it remains preserved.
+ auto PAC = PA.getChecker<ShouldRunExtraSimpleLoopUnswitch>();
+ return !PAC.preservedWhenStateless();
+ }
+ };
+
+ Result run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR) {
+ return Result();
+ }
+
+ static bool isRequired() { return true; }
+};
+
+struct ExtraSimpleLoopUnswitchPassManager : public LoopPassManager {
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U) {
+ auto PA = PreservedAnalyses::all();
+ if (AM.getCachedResult<ShouldRunExtraSimpleLoopUnswitch>(L))
+ PA.intersect(LoopPassManager::run(L, AM, AR, U));
+ PA.abandon<ShouldRunExtraSimpleLoopUnswitch>();
+ return PA;
+ }
+
+ static bool isRequired() { return true; }
+};
+
/// This pass transforms loops that contain branches or switches on loop-
/// invariant conditions to have multiple loops. For example, it turns the left
/// into the right code:
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
index ff60811b616859..b39ca05f3f4b14 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
@@ -15,6 +15,8 @@
#ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H
#define LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H
+#include <utility>
+
namespace llvm {
class Type;
@@ -47,10 +49,12 @@ class IVVisitor {
/// simplifyUsersOfIV - Simplify instructions that use this induction variable
/// by using ScalarEvolution to analyze the IV's recurrence.
-bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
- LoopInfo *LI, const TargetTransformInfo *TTI,
- SmallVectorImpl<WeakTrackingVH> &Dead,
- SCEVExpander &Rewriter, IVVisitor *V = nullptr);
+std::pair<bool, bool> simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE,
+ DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ SmallVectorImpl<WeakTrackingVH> &Dead,
+ SCEVExpander &Rewriter,
+ IVVisitor *V = nullptr);
/// SimplifyLoopIVs - Simplify users of induction variables within this
/// loop. This does not actually change or add IVs.
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 6ede8638291206..8187d78d41e928 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -627,6 +627,13 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
LPM2.addPass(LoopIdiomRecognizePass());
LPM2.addPass(IndVarSimplifyPass());
+ {
+ ExtraSimpleLoopUnswitchPassManager ExtraPasses;
+ ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
+ OptimizationLevel::O3));
+ LPM2.addPass(std::move(ExtraPasses));
+ }
+
invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);
LPM2.addPass(LoopDeletionPass());
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 6cb87fba426463..4ac2a6b48df220 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -583,6 +583,9 @@ LOOP_ANALYSIS("ddg", DDGAnalysis())
LOOP_ANALYSIS("iv-users", IVUsersAnalysis())
LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis())
LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
+LOOP_ANALYSIS("should-run-extra-simple-loop-unswitch",
+ ShouldRunExtraSimpleLoopUnswitch())
+
#undef LOOP_ANALYSIS
#ifndef LOOP_PASS
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 41c4d623617347..b2905109af327e 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -70,6 +70,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -137,6 +138,8 @@ class IndVarSimplify {
SmallVector<WeakTrackingVH, 16> DeadInsts;
bool WidenIndVars;
+ bool RunUnswitching = false;
+
bool handleFloatingPointIV(Loop *L, PHINode *PH);
bool rewriteNonIntegerIVs(Loop *L);
@@ -170,6 +173,8 @@ class IndVarSimplify {
}
bool run(Loop *L);
+
+ bool runUnswitching() const { return RunUnswitching; }
};
} // end anonymous namespace
@@ -614,9 +619,11 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L,
// Information about sign/zero extensions of CurrIV.
IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
- Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
- &Visitor);
+ const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
+ Rewriter, &Visitor);
+ Changed |= C;
+ RunUnswitching |= U;
if (Visitor.WI.WidestNativeType) {
WideIVs.push_back(Visitor.WI);
}
@@ -1873,6 +1880,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
if (OldCond->use_empty())
DeadInsts.emplace_back(OldCond);
Changed = true;
+ RunUnswitching = true;
}
return Changed;
@@ -2058,6 +2066,11 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
auto PA = getLoopPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
+ if (IVS.runUnswitching()) {
+ AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(L, AR);
+ PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
+ }
+
if (AR.MSSA)
PA.preserve<MemorySSAAnalysis>();
return PA;
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 7eb0ba1c2c1793..2bfee6e4c0e421 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -133,6 +133,7 @@ static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(
"not-taken 1/<this option> times or less."),
cl::init(16));
+AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key;
namespace {
struct CompareDesc {
BranchInst *Term;
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 1b142f14d81139..07949094431919 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -59,6 +59,7 @@ namespace {
SmallVectorImpl<WeakTrackingVH> &DeadInsts;
bool Changed = false;
+ bool RunUnswitching = false;
public:
SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
@@ -71,6 +72,7 @@ namespace {
}
bool hasChanged() const { return Changed; }
+ bool runUnswitching() const { return RunUnswitching; }
/// Iteratively perform simplification on a worklist of users of the
/// specified induction variable. This is the top-level driver that applies
@@ -232,6 +234,7 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
ICmp->setPredicate(InvariantPredicate);
ICmp->setOperand(0, NewLHS);
ICmp->setOperand(1, NewRHS);
+ RunUnswitching = true;
return true;
}
@@ -981,14 +984,15 @@ void IVVisitor::anchor() { }
/// Simplify instructions that use this induction variable
/// by using ScalarEvolution to analyze the IV's recurrence.
-bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
- LoopInfo *LI, const TargetTransformInfo *TTI,
- SmallVectorImpl<WeakTrackingVH> &Dead,
- SCEVExpander &Rewriter, IVVisitor *V) {
+std::pair<bool, bool> simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE,
+ DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ SmallVectorImpl<WeakTrackingVH> &Dead,
+ SCEVExpander &Rewriter, IVVisitor *V) {
SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI,
Rewriter, Dead);
SIV.simplifyUsers(CurrIV, V);
- return SIV.hasChanged();
+ return {SIV.hasChanged(), SIV.runUnswitching()};
}
/// Simplify users of induction variables within this
@@ -1002,8 +1006,9 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
#endif
bool Changed = false;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
- Changed |=
+ const auto &[C, _] =
simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, TTI, Dead, Rewriter);
+ Changed |= C;
}
return Changed;
}
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
index c6c9a52167d54e..ddb8e6133ebf63 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
@@ -14,24 +14,50 @@ define i32 @read_only_loop_with_runtime_check(ptr noundef %array, i32 noundef %c
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[DOTNOT_NOT:%.*]] = icmp ult i32 [[TMP1]], [[COUNT]]
+; CHECK-NEXT: br i1 [[DOTNOT_NOT]], label [[FOR_BODY_PREHEADER10:%.*]], label [[IF_THEN:%.*]]
+; CHECK: for.body.preheader10:
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER13:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967288
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI11:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP5]] = add <4 x i32> [[WIDE_LOAD12]], [[VEC_PHI11]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER13]]
+; CHECK: for.body.preheader13:
+; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SUM_07_PH:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
-; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
; CHECK: for.body:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END]] ]
-; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[ADD]], [[IF_END]] ]
-; CHECK-NEXT: br i1 [[DOTNOT_NOT]], label [[IF_END]], label [[IF_THEN:%.*]]
-; CHECK: if.then:
-; CHECK-NEXT: tail call void @llvm.trap()
-; CHECK-NEXT: unreachable
-; CHECK: if.end:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER13]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[SUM_07_PH]], [[FOR_BODY_PREHEADER13]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP2]], [[SUM_07]]
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP8]], [[SUM_07]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: if.then:
+; CHECK-NEXT: tail call void @llvm.trap()
+; CHECK-NEXT: unreachable
;
entry:
%array.addr = alloca ptr, align 8
@@ -96,3 +122,9 @@ declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
declare void @llvm.trap()
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
``````````
</details>
https://github.com/llvm/llvm-project/pull/81271
More information about the llvm-commits
mailing list