[llvm] [LSR] Split the -lsr-term-fold transformation into it's own pass (PR #104234)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 17 18:29:16 PDT 2024
https://github.com/preames updated https://github.com/llvm/llvm-project/pull/104234
>From 761c48f9f556c1ef314ef0b1543fb4b6ad519357 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Wed, 14 Aug 2024 08:59:09 -0700
Subject: [PATCH 1/5] [LSR] Split the -lsr-term-fold transformation into it's
own pass
This transformation doesn't actually use any of the internal state of
LSR and recomputes all information from SCEV. Splitting it out makes
it easier to test.
Note that long term I would like to write a version of this transform
which *is* integrated with LSR's solver, but if that happens, we'll
just delete the extra pass.
Integration wise, I switched from using TTI to using a pass configuration
variable. This seems slightly more idiomatic, and means we don't run
the extra logic on any target other than RISCV.
---
.../llvm/Analysis/TargetTransformInfo.h | 9 -
.../llvm/Analysis/TargetTransformInfoImpl.h | 2 -
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 5 -
llvm/include/llvm/CodeGen/Passes.h | 3 +
llvm/include/llvm/InitializePasses.h | 1 +
llvm/include/llvm/LinkAllPasses.h | 1 +
.../llvm/Passes/MachinePassRegistry.def | 1 +
llvm/include/llvm/Transforms/Scalar.h | 8 +
.../llvm/Transforms/Scalar/LoopTermFold.h | 30 ++
llvm/lib/Analysis/TargetTransformInfo.cpp | 4 -
llvm/lib/Passes/PassBuilder.cpp | 1 +
llvm/lib/Passes/PassRegistry.def | 1 +
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 2 +
.../Target/RISCV/RISCVTargetTransformInfo.h | 4 -
llvm/lib/Transforms/Scalar/CMakeLists.txt | 1 +
.../Transforms/Scalar/LoopStrengthReduce.cpp | 265 +-----------
llvm/lib/Transforms/Scalar/LoopTermFold.cpp | 387 ++++++++++++++++++
llvm/lib/Transforms/Scalar/Scalar.cpp | 1 +
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 +
.../RISCV/lsr-cost-compare.ll | 2 +-
.../RISCV/term-fold-crash.ll | 2 +-
.../lsr-term-fold-negative-testcase.ll | 2 +-
.../LoopStrengthReduce/lsr-term-fold.ll | 2 +-
.../lsr-unreachable-bb-phi-node.ll | 5 +-
24 files changed, 448 insertions(+), 292 deletions(-)
create mode 100644 llvm/include/llvm/Transforms/Scalar/LoopTermFold.h
create mode 100644 llvm/lib/Transforms/Scalar/LoopTermFold.cpp
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 38e8b9da213974..2c998f3e3a73cd 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -742,11 +742,6 @@ class TargetTransformInfo {
/// cost should return false, otherwise return true.
bool isNumRegsMajorCostOfLSR() const;
- /// Return true if LSR should attempts to replace a use of an otherwise dead
- /// primary IV in the latch condition with another IV available in the loop.
- /// When successful, makes the primary IV dead.
- bool shouldFoldTerminatingConditionAfterLSR() const;
-
/// Return true if LSR should drop a found solution if it's calculated to be
/// less profitable than the baseline.
bool shouldDropLSRSolutionIfLessProfitable() const;
@@ -1891,7 +1886,6 @@ class TargetTransformInfo::Concept {
virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2) = 0;
virtual bool isNumRegsMajorCostOfLSR() = 0;
- virtual bool shouldFoldTerminatingConditionAfterLSR() const = 0;
virtual bool shouldDropLSRSolutionIfLessProfitable() const = 0;
virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
virtual bool canMacroFuseCmp() = 0;
@@ -2370,9 +2364,6 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
bool isNumRegsMajorCostOfLSR() override {
return Impl.isNumRegsMajorCostOfLSR();
}
- bool shouldFoldTerminatingConditionAfterLSR() const override {
- return Impl.shouldFoldTerminatingConditionAfterLSR();
- }
bool shouldDropLSRSolutionIfLessProfitable() const override {
return Impl.shouldDropLSRSolutionIfLessProfitable();
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index d208a710bb27fd..11b07ac0b7fc47 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -244,8 +244,6 @@ class TargetTransformInfoImplBase {
bool isNumRegsMajorCostOfLSR() const { return true; }
- bool shouldFoldTerminatingConditionAfterLSR() const { return false; }
-
bool shouldDropLSRSolutionIfLessProfitable() const { return false; }
bool isProfitableLSRChainElement(Instruction *I) const { return false; }
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 279cfb5aa47d6f..a57bd876561b60 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -394,11 +394,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR();
}
- bool shouldFoldTerminatingConditionAfterLSR() const {
- return TargetTransformInfoImplBase::
- shouldFoldTerminatingConditionAfterLSR();
- }
-
bool shouldDropLSRSolutionIfLessProfitable() const {
return TargetTransformInfoImplBase::shouldDropLSRSolutionIfLessProfitable();
}
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 20273d069bf053..db405a9e571c29 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -201,6 +201,9 @@ namespace llvm {
// register allocator.
extern char &InitUndefID;
+ extern char &LoopStrengthReduceID;
+ extern char &LoopTermFoldID;
+
/// FastRegisterAllocation Pass - This pass register allocates as fast as
/// possible. It is best suited for debug code where live ranges are short.
///
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 261912aab3076c..3bf1840cf9a0fe 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -168,6 +168,7 @@ void initializeLoopInfoWrapperPassPass(PassRegistry&);
void initializeLoopPassPass(PassRegistry&);
void initializeLoopSimplifyPass(PassRegistry&);
void initializeLoopStrengthReducePass(PassRegistry&);
+void initializeLoopTermFoldPass(PassRegistry&);
void initializeLoopUnrollPass(PassRegistry&);
void initializeLowerAtomicLegacyPassPass(PassRegistry&);
void initializeLowerConstantIntrinsicsPass(PassRegistry&);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index c00e425b131987..b490e816f740ca 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -88,6 +88,7 @@ namespace {
(void) llvm::createLoopExtractorPass();
(void) llvm::createLoopSimplifyPass();
(void) llvm::createLoopStrengthReducePass();
+ (void) llvm::createLoopTermFoldPass();
(void)llvm::createLoopUnrollPass();
(void) llvm::createLowerGlobalDtorsLegacyPass();
(void) llvm::createLowerInvokePass();
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 8e669ee5791239..05baf514fa7210 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -79,6 +79,7 @@ FUNCTION_PASS("win-eh-prepare", WinEHPreparePass())
#define LOOP_PASS(NAME, CREATE_PASS)
#endif
LOOP_PASS("loop-reduce", LoopStrengthReducePass())
+LOOP_PASS("loop-term-fold", LoopTermFoldPass())
#undef LOOP_PASS
#ifndef MACHINE_MODULE_PASS
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index 98d0adca355214..17f4327eb3e1ab 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -51,6 +51,14 @@ Pass *createLICMPass();
//
Pass *createLoopStrengthReducePass();
+//===----------------------------------------------------------------------===//
+//
+// LoopTermFold - This pass attempts to eliminate the last use of an IV in
+// a loop terminator instruction by rewriting it in terms of another IV.
+// Expected to be run immediately after LSR.
+//
+Pass *createLoopTermFoldPass();
+
//===----------------------------------------------------------------------===//
//
// LoopUnroll - This pass is a simple loop unrolling pass.
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopTermFold.h b/llvm/include/llvm/Transforms/Scalar/LoopTermFold.h
new file mode 100644
index 00000000000000..92c3ed35152756
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/LoopTermFold.h
@@ -0,0 +1,30 @@
+//===- LoopTermFold.h - Loop Term Fold Pass ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPTERMFOLD_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPTERMFOLD_H
+
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Loop;
+class LPMUpdater;
+
+class LoopTermFoldPass : public PassInfoMixin<LoopTermFoldPass> {
+public:
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPTERMFOLD_H
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index dcde78925bfa98..2c26493bd3f1ca 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -427,10 +427,6 @@ bool TargetTransformInfo::isNumRegsMajorCostOfLSR() const {
return TTIImpl->isNumRegsMajorCostOfLSR();
}
-bool TargetTransformInfo::shouldFoldTerminatingConditionAfterLSR() const {
- return TTIImpl->shouldFoldTerminatingConditionAfterLSR();
-}
-
bool TargetTransformInfo::shouldDropLSRSolutionIfLessProfitable() const {
return TTIImpl->shouldDropLSRSolutionIfLessProfitable();
}
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 1859fde279c98d..2ba9e14b91c098 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -248,6 +248,7 @@
#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
#include "llvm/Transforms/Scalar/LoopSink.h"
#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
+#include "llvm/Transforms/Scalar/LoopTermFold.h"
#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 5ef8ba30944470..7cd944f3cee0b6 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -641,6 +641,7 @@ LOOP_PASS("loop-idiom-vectorize", LoopIdiomVectorizePass())
LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass())
LOOP_PASS("loop-predication", LoopPredicationPass())
LOOP_PASS("loop-reduce", LoopStrengthReducePass())
+LOOP_PASS("loop-term-fold", LoopTermFoldPass())
LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass())
LOOP_PASS("loop-unroll-full", LoopFullUnrollPass())
LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass())
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index b6884321f08411..6f37270c0fb737 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -427,8 +427,10 @@ void RISCVPassConfig::addIRPasses() {
addPass(createInterleavedAccessPass());
addPass(createRISCVCodeGenPreparePass());
}
+ insertPass(&LoopStrengthReduceID, &LoopTermFoldID);
TargetPassConfig::addIRPasses();
+
}
bool RISCVPassConfig::addPreISel() {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 9c37a4f6ec2d04..b76c77fbee389d 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -394,10 +394,6 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2);
- bool shouldFoldTerminatingConditionAfterLSR() const {
- return true;
- }
-
std::optional<unsigned> getMinPageSize() const { return 4096; }
};
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index ba09ebf8b04c4c..939a1457239567 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -44,6 +44,7 @@ add_llvm_component_library(LLVMScalarOpts
LoopRotation.cpp
LoopSimplifyCFG.cpp
LoopStrengthReduce.cpp
+ LoopTermFold.cpp
LoopUnrollPass.cpp
LoopUnrollAndJamPass.cpp
LoopVersioningLICM.cpp
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 91461d1ed27592..ad49dc9e3dccf9 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -81,6 +81,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -189,10 +190,6 @@ static cl::opt<unsigned> SetupCostDepthLimit(
"lsr-setupcost-depth-limit", cl::Hidden, cl::init(7),
cl::desc("The limit on recursion depth for LSRs setup cost"));
-static cl::opt<cl::boolOrDefault> AllowTerminatingConditionFoldingAfterLSR(
- "lsr-term-fold", cl::Hidden,
- cl::desc("Attempt to replace primary IV with other IV."));
-
static cl::opt<cl::boolOrDefault> AllowDropSolutionIfLessProfitable(
"lsr-drop-solution", cl::Hidden,
cl::desc("Attempt to drop solution if it is less profitable"));
@@ -205,9 +202,6 @@ static cl::opt<bool> DropScaledForVScale(
"lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true),
cl::desc("Avoid using scaled registers with vscale-relative addressing"));
-STATISTIC(NumTermFold,
- "Number of terminating condition fold recognized and performed");
-
#ifndef NDEBUG
// Stress test IV chain generation.
static cl::opt<bool> StressIVChain(
@@ -7062,186 +7056,6 @@ static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
return nullptr;
}
-static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *, bool>>
-canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
- const LoopInfo &LI, const TargetTransformInfo &TTI) {
- if (!L->isInnermost()) {
- LLVM_DEBUG(dbgs() << "Cannot fold on non-innermost loop\n");
- return std::nullopt;
- }
- // Only inspect on simple loop structure
- if (!L->isLoopSimplifyForm()) {
- LLVM_DEBUG(dbgs() << "Cannot fold on non-simple loop\n");
- return std::nullopt;
- }
-
- if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
- LLVM_DEBUG(dbgs() << "Cannot fold on backedge that is loop variant\n");
- return std::nullopt;
- }
-
- BasicBlock *LoopLatch = L->getLoopLatch();
- BranchInst *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator());
- if (!BI || BI->isUnconditional())
- return std::nullopt;
- auto *TermCond = dyn_cast<ICmpInst>(BI->getCondition());
- if (!TermCond) {
- LLVM_DEBUG(
- dbgs() << "Cannot fold on branching condition that is not an ICmpInst");
- return std::nullopt;
- }
- if (!TermCond->hasOneUse()) {
- LLVM_DEBUG(
- dbgs()
- << "Cannot replace terminating condition with more than one use\n");
- return std::nullopt;
- }
-
- BinaryOperator *LHS = dyn_cast<BinaryOperator>(TermCond->getOperand(0));
- Value *RHS = TermCond->getOperand(1);
- if (!LHS || !L->isLoopInvariant(RHS))
- // We could pattern match the inverse form of the icmp, but that is
- // non-canonical, and this pass is running *very* late in the pipeline.
- return std::nullopt;
-
- // Find the IV used by the current exit condition.
- PHINode *ToFold;
- Value *ToFoldStart, *ToFoldStep;
- if (!matchSimpleRecurrence(LHS, ToFold, ToFoldStart, ToFoldStep))
- return std::nullopt;
-
- // Ensure the simple recurrence is a part of the current loop.
- if (ToFold->getParent() != L->getHeader())
- return std::nullopt;
-
- // If that IV isn't dead after we rewrite the exit condition in terms of
- // another IV, there's no point in doing the transform.
- if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond))
- return std::nullopt;
-
- // Inserting instructions in the preheader has a runtime cost, scale
- // the allowed cost with the loops trip count as best we can.
- const unsigned ExpansionBudget = [&]() {
- unsigned Budget = 2 * SCEVCheapExpansionBudget;
- if (unsigned SmallTC = SE.getSmallConstantMaxTripCount(L))
- return std::min(Budget, SmallTC);
- if (std::optional<unsigned> SmallTC = getLoopEstimatedTripCount(L))
- return std::min(Budget, *SmallTC);
- // Unknown trip count, assume long running by default.
- return Budget;
- }();
-
- const SCEV *BECount = SE.getBackedgeTakenCount(L);
- const DataLayout &DL = L->getHeader()->getDataLayout();
- SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
-
- PHINode *ToHelpFold = nullptr;
- const SCEV *TermValueS = nullptr;
- bool MustDropPoison = false;
- auto InsertPt = L->getLoopPreheader()->getTerminator();
- for (PHINode &PN : L->getHeader()->phis()) {
- if (ToFold == &PN)
- continue;
-
- if (!SE.isSCEVable(PN.getType())) {
- LLVM_DEBUG(dbgs() << "IV of phi '" << PN
- << "' is not SCEV-able, not qualified for the "
- "terminating condition folding.\n");
- continue;
- }
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
- // Only speculate on affine AddRec
- if (!AddRec || !AddRec->isAffine()) {
- LLVM_DEBUG(dbgs() << "SCEV of phi '" << PN
- << "' is not an affine add recursion, not qualified "
- "for the terminating condition folding.\n");
- continue;
- }
-
- // Check that we can compute the value of AddRec on the exiting iteration
- // without soundness problems. evaluateAtIteration internally needs
- // to multiply the stride of the iteration number - which may wrap around.
- // The issue here is subtle because computing the result accounting for
- // wrap is insufficient. In order to use the result in an exit test, we
- // must also know that AddRec doesn't take the same value on any previous
- // iteration. The simplest case to consider is a candidate IV which is
- // narrower than the trip count (and thus original IV), but this can
- // also happen due to non-unit strides on the candidate IVs.
- if (!AddRec->hasNoSelfWrap() ||
- !SE.isKnownNonZero(AddRec->getStepRecurrence(SE)))
- continue;
-
- const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE);
- const SCEV *TermValueSLocal = PostInc->evaluateAtIteration(BECount, SE);
- if (!Expander.isSafeToExpand(TermValueSLocal)) {
- LLVM_DEBUG(
- dbgs() << "Is not safe to expand terminating value for phi node" << PN
- << "\n");
- continue;
- }
-
- if (Expander.isHighCostExpansion(TermValueSLocal, L, ExpansionBudget,
- &TTI, InsertPt)) {
- LLVM_DEBUG(
- dbgs() << "Is too expensive to expand terminating value for phi node"
- << PN << "\n");
- continue;
- }
-
- // The candidate IV may have been otherwise dead and poison from the
- // very first iteration. If we can't disprove that, we can't use the IV.
- if (!mustExecuteUBIfPoisonOnPathTo(&PN, LoopLatch->getTerminator(), &DT)) {
- LLVM_DEBUG(dbgs() << "Can not prove poison safety for IV "
- << PN << "\n");
- continue;
- }
-
- // The candidate IV may become poison on the last iteration. If this
- // value is not branched on, this is a well defined program. We're
- // about to add a new use to this IV, and we have to ensure we don't
- // insert UB which didn't previously exist.
- bool MustDropPoisonLocal = false;
- Instruction *PostIncV =
- cast<Instruction>(PN.getIncomingValueForBlock(LoopLatch));
- if (!mustExecuteUBIfPoisonOnPathTo(PostIncV, LoopLatch->getTerminator(),
- &DT)) {
- LLVM_DEBUG(dbgs() << "Can not prove poison safety to insert use"
- << PN << "\n");
-
- // If this is a complex recurrance with multiple instructions computing
- // the backedge value, we might need to strip poison flags from all of
- // them.
- if (PostIncV->getOperand(0) != &PN)
- continue;
-
- // In order to perform the transform, we need to drop the poison generating
- // flags on this instruction (if any).
- MustDropPoisonLocal = PostIncV->hasPoisonGeneratingFlags();
- }
-
- // We pick the last legal alternate IV. We could expore choosing an optimal
- // alternate IV if we had a decent heuristic to do so.
- ToHelpFold = &PN;
- TermValueS = TermValueSLocal;
- MustDropPoison = MustDropPoisonLocal;
- }
-
- LLVM_DEBUG(if (ToFold && !ToHelpFold) dbgs()
- << "Cannot find other AddRec IV to help folding\n";);
-
- LLVM_DEBUG(if (ToFold && ToHelpFold) dbgs()
- << "\nFound loop that can fold terminating condition\n"
- << " BECount (SCEV): " << *SE.getBackedgeTakenCount(L) << "\n"
- << " TermCond: " << *TermCond << "\n"
- << " BrandInst: " << *BI << "\n"
- << " ToFold: " << *ToFold << "\n"
- << " ToHelpFold: " << *ToHelpFold << "\n");
-
- if (!ToFold || !ToHelpFold)
- return std::nullopt;
- return std::make_tuple(ToFold, ToHelpFold, TermValueS, MustDropPoison);
-}
-
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
DominatorTree &DT, LoopInfo &LI,
const TargetTransformInfo &TTI,
@@ -7302,81 +7116,6 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
}
}
- const bool EnableFormTerm = [&] {
- switch (AllowTerminatingConditionFoldingAfterLSR) {
- case cl::BOU_TRUE:
- return true;
- case cl::BOU_FALSE:
- return false;
- case cl::BOU_UNSET:
- return TTI.shouldFoldTerminatingConditionAfterLSR();
- }
- llvm_unreachable("Unhandled cl::boolOrDefault enum");
- }();
-
- if (EnableFormTerm) {
- if (auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI, TTI)) {
- auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt;
-
- Changed = true;
- NumTermFold++;
-
- BasicBlock *LoopPreheader = L->getLoopPreheader();
- BasicBlock *LoopLatch = L->getLoopLatch();
-
- (void)ToFold;
- LLVM_DEBUG(dbgs() << "To fold phi-node:\n"
- << *ToFold << "\n"
- << "New term-cond phi-node:\n"
- << *ToHelpFold << "\n");
-
- Value *StartValue = ToHelpFold->getIncomingValueForBlock(LoopPreheader);
- (void)StartValue;
- Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch);
-
- // See comment in canFoldTermCondOfLoop on why this is sufficient.
- if (MustDrop)
- cast<Instruction>(LoopValue)->dropPoisonGeneratingFlags();
-
- // SCEVExpander for both use in preheader and latch
- const DataLayout &DL = L->getHeader()->getDataLayout();
- SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
-
- assert(Expander.isSafeToExpand(TermValueS) &&
- "Terminating value was checked safe in canFoldTerminatingCondition");
-
- // Create new terminating value at loop preheader
- Value *TermValue = Expander.expandCodeFor(TermValueS, ToHelpFold->getType(),
- LoopPreheader->getTerminator());
-
- LLVM_DEBUG(dbgs() << "Start value of new term-cond phi-node:\n"
- << *StartValue << "\n"
- << "Terminating value of new term-cond phi-node:\n"
- << *TermValue << "\n");
-
- // Create new terminating condition at loop latch
- BranchInst *BI = cast<BranchInst>(LoopLatch->getTerminator());
- ICmpInst *OldTermCond = cast<ICmpInst>(BI->getCondition());
- IRBuilder<> LatchBuilder(LoopLatch->getTerminator());
- Value *NewTermCond =
- LatchBuilder.CreateICmp(CmpInst::ICMP_EQ, LoopValue, TermValue,
- "lsr_fold_term_cond.replaced_term_cond");
- // Swap successors to exit loop body if IV equals to new TermValue
- if (BI->getSuccessor(0) == L->getHeader())
- BI->swapSuccessors();
-
- LLVM_DEBUG(dbgs() << "Old term-cond:\n"
- << *OldTermCond << "\n"
- << "New term-cond:\n" << *NewTermCond << "\n");
-
- BI->setCondition(NewTermCond);
-
- Expander.clear();
- OldTermCond->eraseFromParent();
- DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
- }
- }
-
if (SalvageableDVIRecords.empty())
return Changed;
@@ -7435,6 +7174,8 @@ PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
char LoopStrengthReduce::ID = 0;
+char &llvm::LoopStrengthReduceID = LoopStrengthReduce::ID;
+
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
diff --git a/llvm/lib/Transforms/Scalar/LoopTermFold.cpp b/llvm/lib/Transforms/Scalar/LoopTermFold.cpp
new file mode 100644
index 00000000000000..27ca892ff8024d
--- /dev/null
+++ b/llvm/lib/Transforms/Scalar/LoopTermFold.cpp
@@ -0,0 +1,387 @@
+//===- LoopTermFold.cpp - Eliminate last use of IV in exit branch----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LoopTermFold.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+#include <cassert>
+#include <optional>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-term-fold"
+
+STATISTIC(NumTermFold,
+ "Number of terminating condition fold recognized and performed");
+
+static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *, bool>>
+canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
+ const LoopInfo &LI, const TargetTransformInfo &TTI) {
+ if (!L->isInnermost()) {
+ LLVM_DEBUG(dbgs() << "Cannot fold on non-innermost loop\n");
+ return std::nullopt;
+ }
+ // Only inspect on simple loop structure
+ if (!L->isLoopSimplifyForm()) {
+ LLVM_DEBUG(dbgs() << "Cannot fold on non-simple loop\n");
+ return std::nullopt;
+ }
+
+ if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
+ LLVM_DEBUG(dbgs() << "Cannot fold on backedge that is loop variant\n");
+ return std::nullopt;
+ }
+
+ BasicBlock *LoopLatch = L->getLoopLatch();
+ BranchInst *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator());
+ if (!BI || BI->isUnconditional())
+ return std::nullopt;
+ auto *TermCond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!TermCond) {
+ LLVM_DEBUG(
+ dbgs() << "Cannot fold on branching condition that is not an ICmpInst");
+ return std::nullopt;
+ }
+ if (!TermCond->hasOneUse()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Cannot replace terminating condition with more than one use\n");
+ return std::nullopt;
+ }
+
+ BinaryOperator *LHS = dyn_cast<BinaryOperator>(TermCond->getOperand(0));
+ Value *RHS = TermCond->getOperand(1);
+ if (!LHS || !L->isLoopInvariant(RHS))
+ // We could pattern match the inverse form of the icmp, but that is
+ // non-canonical, and this pass is running *very* late in the pipeline.
+ return std::nullopt;
+
+ // Find the IV used by the current exit condition.
+ PHINode *ToFold;
+ Value *ToFoldStart, *ToFoldStep;
+ if (!matchSimpleRecurrence(LHS, ToFold, ToFoldStart, ToFoldStep))
+ return std::nullopt;
+
+ // Ensure the simple recurrence is a part of the current loop.
+ if (ToFold->getParent() != L->getHeader())
+ return std::nullopt;
+
+ // If that IV isn't dead after we rewrite the exit condition in terms of
+ // another IV, there's no point in doing the transform.
+ if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond))
+ return std::nullopt;
+
+ // Inserting instructions in the preheader has a runtime cost, scale
+ // the allowed cost with the loops trip count as best we can.
+ const unsigned ExpansionBudget = [&]() {
+ unsigned Budget = 2 * SCEVCheapExpansionBudget;
+ if (unsigned SmallTC = SE.getSmallConstantMaxTripCount(L))
+ return std::min(Budget, SmallTC);
+ if (std::optional<unsigned> SmallTC = getLoopEstimatedTripCount(L))
+ return std::min(Budget, *SmallTC);
+ // Unknown trip count, assume long running by default.
+ return Budget;
+ }();
+
+ const SCEV *BECount = SE.getBackedgeTakenCount(L);
+ const DataLayout &DL = L->getHeader()->getDataLayout();
+ SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
+
+ PHINode *ToHelpFold = nullptr;
+ const SCEV *TermValueS = nullptr;
+ bool MustDropPoison = false;
+ auto InsertPt = L->getLoopPreheader()->getTerminator();
+ for (PHINode &PN : L->getHeader()->phis()) {
+ if (ToFold == &PN)
+ continue;
+
+ if (!SE.isSCEVable(PN.getType())) {
+ LLVM_DEBUG(dbgs() << "IV of phi '" << PN
+ << "' is not SCEV-able, not qualified for the "
+ "terminating condition folding.\n");
+ continue;
+ }
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
+ // Only speculate on affine AddRec
+ if (!AddRec || !AddRec->isAffine()) {
+ LLVM_DEBUG(dbgs() << "SCEV of phi '" << PN
+ << "' is not an affine add recursion, not qualified "
+ "for the terminating condition folding.\n");
+ continue;
+ }
+
+ // Check that we can compute the value of AddRec on the exiting iteration
+ // without soundness problems. evaluateAtIteration internally needs
+ // to multiply the stride of the iteration number - which may wrap around.
+ // The issue here is subtle because computing the result accounting for
+ // wrap is insufficient. In order to use the result in an exit test, we
+ // must also know that AddRec doesn't take the same value on any previous
+ // iteration. The simplest case to consider is a candidate IV which is
+ // narrower than the trip count (and thus original IV), but this can
+ // also happen due to non-unit strides on the candidate IVs.
+ if (!AddRec->hasNoSelfWrap() ||
+ !SE.isKnownNonZero(AddRec->getStepRecurrence(SE)))
+ continue;
+
+ const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE);
+ const SCEV *TermValueSLocal = PostInc->evaluateAtIteration(BECount, SE);
+ if (!Expander.isSafeToExpand(TermValueSLocal)) {
+ LLVM_DEBUG(
+ dbgs() << "Is not safe to expand terminating value for phi node" << PN
+ << "\n");
+ continue;
+ }
+
+ if (Expander.isHighCostExpansion(TermValueSLocal, L, ExpansionBudget,
+ &TTI, InsertPt)) {
+ LLVM_DEBUG(
+ dbgs() << "Is too expensive to expand terminating value for phi node"
+ << PN << "\n");
+ continue;
+ }
+
+ // The candidate IV may have been otherwise dead and poison from the
+ // very first iteration. If we can't disprove that, we can't use the IV.
+ if (!mustExecuteUBIfPoisonOnPathTo(&PN, LoopLatch->getTerminator(), &DT)) {
+ LLVM_DEBUG(dbgs() << "Can not prove poison safety for IV "
+ << PN << "\n");
+ continue;
+ }
+
+ // The candidate IV may become poison on the last iteration. If this
+ // value is not branched on, this is a well defined program. We're
+ // about to add a new use to this IV, and we have to ensure we don't
+ // insert UB which didn't previously exist.
+ bool MustDropPoisonLocal = false;
+ Instruction *PostIncV =
+ cast<Instruction>(PN.getIncomingValueForBlock(LoopLatch));
+ if (!mustExecuteUBIfPoisonOnPathTo(PostIncV, LoopLatch->getTerminator(),
+ &DT)) {
+ LLVM_DEBUG(dbgs() << "Can not prove poison safety to insert use"
+ << PN << "\n");
+
+ // If this is a complex recurrance with multiple instructions computing
+ // the backedge value, we might need to strip poison flags from all of
+ // them.
+ if (PostIncV->getOperand(0) != &PN)
+ continue;
+
+ // In order to perform the transform, we need to drop the poison generating
+ // flags on this instruction (if any).
+ MustDropPoisonLocal = PostIncV->hasPoisonGeneratingFlags();
+ }
+
+ // We pick the last legal alternate IV. We could expore choosing an optimal
+ // alternate IV if we had a decent heuristic to do so.
+ ToHelpFold = &PN;
+ TermValueS = TermValueSLocal;
+ MustDropPoison = MustDropPoisonLocal;
+ }
+
+ LLVM_DEBUG(if (ToFold && !ToHelpFold) dbgs()
+ << "Cannot find other AddRec IV to help folding\n";);
+
+ LLVM_DEBUG(if (ToFold && ToHelpFold) dbgs()
+ << "\nFound loop that can fold terminating condition\n"
+ << " BECount (SCEV): " << *SE.getBackedgeTakenCount(L) << "\n"
+ << " TermCond: " << *TermCond << "\n"
+ << " BrandInst: " << *BI << "\n"
+ << " ToFold: " << *ToFold << "\n"
+ << " ToHelpFold: " << *ToHelpFold << "\n");
+
+ if (!ToFold || !ToHelpFold)
+ return std::nullopt;
+ return std::make_tuple(ToFold, ToHelpFold, TermValueS, MustDropPoison);
+}
+
+static bool RunTermFold(Loop *L, ScalarEvolution &SE,
+ DominatorTree &DT, LoopInfo &LI,
+ const TargetTransformInfo &TTI,
+ TargetLibraryInfo &TLI,
+ MemorySSA *MSSA) {
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
+
+ auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI, TTI);
+ if (!Opt)
+ return false;
+
+ auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt;
+
+ NumTermFold++;
+
+ BasicBlock *LoopPreheader = L->getLoopPreheader();
+ BasicBlock *LoopLatch = L->getLoopLatch();
+
+ (void)ToFold;
+ LLVM_DEBUG(dbgs() << "To fold phi-node:\n"
+ << *ToFold << "\n"
+ << "New term-cond phi-node:\n"
+ << *ToHelpFold << "\n");
+
+ Value *StartValue = ToHelpFold->getIncomingValueForBlock(LoopPreheader);
+ (void)StartValue;
+ Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch);
+
+ // See comment in canFoldTermCondOfLoop on why this is sufficient.
+ if (MustDrop)
+ cast<Instruction>(LoopValue)->dropPoisonGeneratingFlags();
+
+ // SCEVExpander for both use in preheader and latch
+ const DataLayout &DL = L->getHeader()->getDataLayout();
+ SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
+
+ assert(Expander.isSafeToExpand(TermValueS) &&
+ "Terminating value was checked safe in canFoldTerminatingCondition");
+
+ // Create new terminating value at loop preheader
+ Value *TermValue = Expander.expandCodeFor(TermValueS, ToHelpFold->getType(),
+ LoopPreheader->getTerminator());
+
+ LLVM_DEBUG(dbgs() << "Start value of new term-cond phi-node:\n"
+ << *StartValue << "\n"
+ << "Terminating value of new term-cond phi-node:\n"
+ << *TermValue << "\n");
+
+ // Create new terminating condition at loop latch
+ BranchInst *BI = cast<BranchInst>(LoopLatch->getTerminator());
+ ICmpInst *OldTermCond = cast<ICmpInst>(BI->getCondition());
+ IRBuilder<> LatchBuilder(LoopLatch->getTerminator());
+ Value *NewTermCond =
+ LatchBuilder.CreateICmp(CmpInst::ICMP_EQ, LoopValue, TermValue,
+ "lsr_fold_term_cond.replaced_term_cond");
+ // Swap successors to exit loop body if IV equals to new TermValue
+ if (BI->getSuccessor(0) == L->getHeader())
+ BI->swapSuccessors();
+
+ LLVM_DEBUG(dbgs() << "Old term-cond:\n"
+ << *OldTermCond << "\n"
+ << "New term-cond:\n" << *NewTermCond << "\n");
+
+ BI->setCondition(NewTermCond);
+
+ Expander.clear();
+ OldTermCond->eraseFromParent();
+ DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
+ return true;
+}
+
+namespace {
+
+class LoopTermFold : public LoopPass {
+public:
+ static char ID; // Pass ID, replacement for typeid
+
+ LoopTermFold();
+
+private:
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+} // end anonymous namespace
+
+LoopTermFold::LoopTermFold() : LoopPass(ID) {
+ initializeLoopTermFoldPass(*PassRegistry::getPassRegistry());
+}
+
+void LoopTermFold::getAnalysisUsage(AnalysisUsage &AU) const {
+ // We split critical edges, so we change the CFG. However, we do update
+ // many analyses if they are around.
+ AU.addPreservedID(LoopSimplifyID);
+
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+}
+
+
+bool LoopTermFold::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
+ if (skipLoop(L))
+ return false;
+
+ auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *L->getHeader()->getParent());
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
+ *L->getHeader()->getParent());
+ auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ MemorySSA *MSSA = nullptr;
+ if (MSSAAnalysis)
+ MSSA = &MSSAAnalysis->getMSSA();
+ return RunTermFold(L, SE, DT, LI, TTI, TLI, MSSA);
+}
+
+PreservedAnalyses LoopTermFoldPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &) {
+ if (!RunTermFold(&L, AR.SE, AR.DT, AR.LI, AR.TTI, AR.TLI, AR.MSSA))
+ return PreservedAnalyses::all();
+
+ auto PA = getLoopPassPreservedAnalyses();
+ if (AR.MSSA)
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
+}
+
+char LoopTermFold::ID = 0;
+char &llvm::LoopTermFoldID = LoopTermFold::ID;
+
+INITIALIZE_PASS_BEGIN(LoopTermFold, "loop-term-fold",
+ "Loop Terminator Folding", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(LoopTermFold, "loop-term-fold",
+ "Loop Terminator Folding", false, false)
+
+Pass *llvm::createLoopTermFoldPass() { return new LoopTermFold(); }
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index 86669e8c5aa49b..7aeee1d31f7e79 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -30,6 +30,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLegacyLICMPassPass(Registry);
initializeLoopDataPrefetchLegacyPassPass(Registry);
initializeLoopStrengthReducePass(Registry);
+ initializeLoopTermFoldPass(Registry);
initializeLoopUnrollPass(Registry);
initializeLowerAtomicLegacyPassPass(Registry);
initializeMergeICmpsLegacyPassPass(Registry);
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index df9cb5de5d7682..44c270fdc3c257 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -45,6 +45,7 @@
; CHECK-NEXT: Canonicalize Freeze Instructions in Loops
; CHECK-NEXT: Induction Variable Users
; CHECK-NEXT: Loop Strength Reduction
+; CHECK-NEXT: Loop Terminator Folding
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Merge contiguous icmps into a memcmp
diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll
index 9c11bd064ad47c..cadee94ff40960 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; RUN: opt < %s -passes=loop-reduce,loop-term-fold -S | FileCheck %s
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64"
diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/term-fold-crash.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/term-fold-crash.ll
index 8ca7f0010bbbe5..9fb240684d232b 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/term-fold-crash.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/term-fold-crash.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -S -passes=loop-reduce -mtriple=riscv64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: opt -S -passes=loop-reduce,loop-term-fold -mtriple=riscv64-unknown-linux-gnu < %s | FileCheck %s
define void @test(ptr %p, i8 %arg, i32 %start) {
; CHECK-LABEL: define void @test(
diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll
index 2d3d3a4b72a1ac..89ddba3343ffa2 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; REQUIRES: asserts
-; RUN: opt < %s -passes="loop-reduce" -S -debug -lsr-term-fold 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=loop-reduce,loop-term-fold -S -debug 2>&1 | FileCheck %s
target datalayout = "e-p:64:64:64-n64"
diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
index 7299a014b79835..6f34dc843ae1ee 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes="loop-reduce" -S -lsr-term-fold | FileCheck %s
+; RUN: opt < %s -passes="loop-reduce,loop-term-fold" -S | FileCheck %s
target datalayout = "e-p:64:64:64-n64"
diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-unreachable-bb-phi-node.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-unreachable-bb-phi-node.ll
index 1454535b52bccb..67a71496e4cec8 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/lsr-unreachable-bb-phi-node.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-unreachable-bb-phi-node.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -loop-reduce -S -lsr-term-fold | FileCheck %s
+; RUN: opt < %s -passes=loop-reduce,loop-term-fold -S | FileCheck %s
; This test used to crash due to matchSimpleRecurrence matching the simple
; recurrence in pn-loop when evaluating unrelated-loop. Since unrelated-loop
@@ -13,9 +13,10 @@ define void @phi_node_different_bb() {
; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[TMP2]], 1
; CHECK-NEXT: br i1 [[TMP3]], label [[PN_LOOP]], label [[UNRELATED_LOOP_PREHEADER:%.*]]
; CHECK: unrelated-loop.preheader:
+; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP2]], [[PN_LOOP]] ]
; CHECK-NEXT: br label [[UNRELATED_LOOP:%.*]]
; CHECK: unrelated-loop:
-; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[DOTLCSSA]], 0
; CHECK-NEXT: br i1 [[TMP4]], label [[END:%.*]], label [[UNRELATED_LOOP]]
; CHECK: end:
; CHECK-NEXT: ret void
>From d82e407d836f71fd3cf176a505a170201f663fa8 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Wed, 14 Aug 2024 13:36:08 -0700
Subject: [PATCH 2/5] (left as a separate commit so alternative is visible,
will be squashed)
The problem with the ID scheme is that we don't have any other occurrences
of such IDs defined outside the CodeGen directory. I'm worried about
a possible link dependency in some build configuration.
---
llvm/include/llvm/CodeGen/Passes.h | 3 ---
llvm/include/llvm/CodeGen/TargetPassConfig.h | 3 +++
llvm/lib/CodeGen/TargetPassConfig.cpp | 2 ++
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 3 +--
llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 2 --
llvm/lib/Transforms/Scalar/LoopTermFold.cpp | 1 -
6 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index db405a9e571c29..20273d069bf053 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -201,9 +201,6 @@ namespace llvm {
// register allocator.
extern char &InitUndefID;
- extern char &LoopStrengthReduceID;
- extern char &LoopTermFoldID;
-
/// FastRegisterAllocation Pass - This pass register allocates as fast as
/// possible. It is best suited for debug code where live ranges are short.
///
diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index d00e0bed91a457..ecff65c483d8d8 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -140,6 +140,9 @@ class TargetPassConfig : public ImmutablePass {
/// callers.
bool RequireCodeGenSCCOrder = false;
+ /// Enable LoopTailFold immediately after LSR
+ bool EnableLoopTailFold = false;
+
/// Add the actual instruction selection passes. This does not include
/// preparation passes on IR.
bool addCoreISelPasses();
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 1b0012b65b80d4..45ddc941b5e29b 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -828,6 +828,8 @@ void TargetPassConfig::addIRPasses() {
if (!DisableLSR) {
addPass(createCanonicalizeFreezeInLoopsPass());
addPass(createLoopStrengthReducePass());
+ if (EnableLoopTailFold)
+ addPass(createLoopTermFoldPass());
if (PrintLSR)
addPass(createPrintFunctionPass(dbgs(),
"\n\n*** Code after LSR ***\n"));
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 6f37270c0fb737..e0ef5c91520ab8 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -336,6 +336,7 @@ class RISCVPassConfig : public TargetPassConfig {
if (TM.getOptLevel() != CodeGenOptLevel::None)
substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
setEnableSinkAndFold(EnableSinkFold);
+ EnableLoopTailFold = true;
}
RISCVTargetMachine &getRISCVTargetMachine() const {
@@ -427,10 +428,8 @@ void RISCVPassConfig::addIRPasses() {
addPass(createInterleavedAccessPass());
addPass(createRISCVCodeGenPreparePass());
}
- insertPass(&LoopStrengthReduceID, &LoopTermFoldID);
TargetPassConfig::addIRPasses();
-
}
bool RISCVPassConfig::addPreISel() {
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index ad49dc9e3dccf9..75754eb3a46714 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -7174,8 +7174,6 @@ PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
char LoopStrengthReduce::ID = 0;
-char &llvm::LoopStrengthReduceID = LoopStrengthReduce::ID;
-
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
diff --git a/llvm/lib/Transforms/Scalar/LoopTermFold.cpp b/llvm/lib/Transforms/Scalar/LoopTermFold.cpp
index 27ca892ff8024d..1729492c1c31a3 100644
--- a/llvm/lib/Transforms/Scalar/LoopTermFold.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopTermFold.cpp
@@ -372,7 +372,6 @@ PreservedAnalyses LoopTermFoldPass::run(Loop &L, LoopAnalysisManager &AM,
}
char LoopTermFold::ID = 0;
-char &llvm::LoopTermFoldID = LoopTermFold::ID;
INITIALIZE_PASS_BEGIN(LoopTermFold, "loop-term-fold",
"Loop Terminator Folding", false, false)
>From 8aa3a263e41a89c5f595ed1cb7a3c21e90b742d7 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Fri, 16 Aug 2024 10:13:18 -0700
Subject: [PATCH 3/5] Address review comments
---
llvm/include/llvm/CodeGen/TargetPassConfig.h | 4 ++--
llvm/include/llvm/Transforms/Scalar/LoopTermFold.h | 2 +-
llvm/lib/CodeGen/TargetPassConfig.cpp | 2 +-
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 2 +-
llvm/lib/Transforms/Scalar/LoopTermFold.cpp | 5 +----
5 files changed, 6 insertions(+), 9 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index ecff65c483d8d8..2f5951e3ec3bce 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -140,8 +140,8 @@ class TargetPassConfig : public ImmutablePass {
/// callers.
bool RequireCodeGenSCCOrder = false;
- /// Enable LoopTailFold immediately after LSR
- bool EnableLoopTailFold = false;
+ /// Enable LoopTermFold immediately after LSR
+ bool EnableLoopTermFold = false;
/// Add the actual instruction selection passes. This does not include
/// preparation passes on IR.
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopTermFold.h b/llvm/include/llvm/Transforms/Scalar/LoopTermFold.h
index 92c3ed35152756..974024c586aa80 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopTermFold.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopTermFold.h
@@ -1,4 +1,4 @@
-//===- LoopTermFold.h - Loop Term Fold Pass ---------*- C++ -*-===//
+//===- LoopTermFold.h - Loop Term Fold Pass ---------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 45ddc941b5e29b..1d52ebe6717f04 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -828,7 +828,7 @@ void TargetPassConfig::addIRPasses() {
if (!DisableLSR) {
addPass(createCanonicalizeFreezeInLoopsPass());
addPass(createLoopStrengthReducePass());
- if (EnableLoopTailFold)
+ if (EnableLoopTermFold)
addPass(createLoopTermFoldPass());
if (PrintLSR)
addPass(createPrintFunctionPass(dbgs(),
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index e0ef5c91520ab8..794df2212dfa53 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -336,7 +336,7 @@ class RISCVPassConfig : public TargetPassConfig {
if (TM.getOptLevel() != CodeGenOptLevel::None)
substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
setEnableSinkAndFold(EnableSinkFold);
- EnableLoopTailFold = true;
+ EnableLoopTermFold = true;
}
RISCVTargetMachine &getRISCVTargetMachine() const {
diff --git a/llvm/lib/Transforms/Scalar/LoopTermFold.cpp b/llvm/lib/Transforms/Scalar/LoopTermFold.cpp
index 1729492c1c31a3..ba4dc3d872991c 100644
--- a/llvm/lib/Transforms/Scalar/LoopTermFold.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopTermFold.cpp
@@ -324,12 +324,9 @@ LoopTermFold::LoopTermFold() : LoopPass(ID) {
}
void LoopTermFold::getAnalysisUsage(AnalysisUsage &AU) const {
- // We split critical edges, so we change the CFG. However, we do update
- // many analyses if they are around.
- AU.addPreservedID(LoopSimplifyID);
-
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
>From c7eba8608b4b56fb7a689e42091aa2722ee00ee2 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Fri, 16 Aug 2024 10:35:25 -0700
Subject: [PATCH 4/5] Clang format
---
llvm/lib/Transforms/Scalar/LoopTermFold.cpp | 55 ++++++++++-----------
1 file changed, 26 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopTermFold.cpp b/llvm/lib/Transforms/Scalar/LoopTermFold.cpp
index ba4dc3d872991c..5a595bc53cb751 100644
--- a/llvm/lib/Transforms/Scalar/LoopTermFold.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopTermFold.cpp
@@ -169,8 +169,8 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
continue;
}
- if (Expander.isHighCostExpansion(TermValueSLocal, L, ExpansionBudget,
- &TTI, InsertPt)) {
+ if (Expander.isHighCostExpansion(TermValueSLocal, L, ExpansionBudget, &TTI,
+ InsertPt)) {
LLVM_DEBUG(
dbgs() << "Is too expensive to expand terminating value for phi node"
<< PN << "\n");
@@ -180,8 +180,7 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
// The candidate IV may have been otherwise dead and poison from the
// very first iteration. If we can't disprove that, we can't use the IV.
if (!mustExecuteUBIfPoisonOnPathTo(&PN, LoopLatch->getTerminator(), &DT)) {
- LLVM_DEBUG(dbgs() << "Can not prove poison safety for IV "
- << PN << "\n");
+ LLVM_DEBUG(dbgs() << "Can not prove poison safety for IV " << PN << "\n");
continue;
}
@@ -191,11 +190,11 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
// insert UB which didn't previously exist.
bool MustDropPoisonLocal = false;
Instruction *PostIncV =
- cast<Instruction>(PN.getIncomingValueForBlock(LoopLatch));
+ cast<Instruction>(PN.getIncomingValueForBlock(LoopLatch));
if (!mustExecuteUBIfPoisonOnPathTo(PostIncV, LoopLatch->getTerminator(),
&DT)) {
- LLVM_DEBUG(dbgs() << "Can not prove poison safety to insert use"
- << PN << "\n");
+ LLVM_DEBUG(dbgs() << "Can not prove poison safety to insert use" << PN
+ << "\n");
// If this is a complex recurrance with multiple instructions computing
// the backedge value, we might need to strip poison flags from all of
@@ -203,8 +202,8 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
if (PostIncV->getOperand(0) != &PN)
continue;
- // In order to perform the transform, we need to drop the poison generating
- // flags on this instruction (if any).
+ // In order to perform the transform, we need to drop the poison
+ // generating flags on this instruction (if any).
MustDropPoisonLocal = PostIncV->hasPoisonGeneratingFlags();
}
@@ -231,11 +230,9 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
return std::make_tuple(ToFold, ToHelpFold, TermValueS, MustDropPoison);
}
-static bool RunTermFold(Loop *L, ScalarEvolution &SE,
- DominatorTree &DT, LoopInfo &LI,
- const TargetTransformInfo &TTI,
- TargetLibraryInfo &TLI,
- MemorySSA *MSSA) {
+static bool RunTermFold(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
+ LoopInfo &LI, const TargetTransformInfo &TTI,
+ TargetLibraryInfo &TLI, MemorySSA *MSSA) {
std::unique_ptr<MemorySSAUpdater> MSSAU;
if (MSSA)
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
@@ -253,9 +250,9 @@ static bool RunTermFold(Loop *L, ScalarEvolution &SE,
(void)ToFold;
LLVM_DEBUG(dbgs() << "To fold phi-node:\n"
- << *ToFold << "\n"
- << "New term-cond phi-node:\n"
- << *ToHelpFold << "\n");
+ << *ToFold << "\n"
+ << "New term-cond phi-node:\n"
+ << *ToHelpFold << "\n");
Value *StartValue = ToHelpFold->getIncomingValueForBlock(LoopPreheader);
(void)StartValue;
@@ -277,24 +274,25 @@ static bool RunTermFold(Loop *L, ScalarEvolution &SE,
LoopPreheader->getTerminator());
LLVM_DEBUG(dbgs() << "Start value of new term-cond phi-node:\n"
- << *StartValue << "\n"
- << "Terminating value of new term-cond phi-node:\n"
- << *TermValue << "\n");
+ << *StartValue << "\n"
+ << "Terminating value of new term-cond phi-node:\n"
+ << *TermValue << "\n");
// Create new terminating condition at loop latch
BranchInst *BI = cast<BranchInst>(LoopLatch->getTerminator());
ICmpInst *OldTermCond = cast<ICmpInst>(BI->getCondition());
IRBuilder<> LatchBuilder(LoopLatch->getTerminator());
Value *NewTermCond =
- LatchBuilder.CreateICmp(CmpInst::ICMP_EQ, LoopValue, TermValue,
- "lsr_fold_term_cond.replaced_term_cond");
+ LatchBuilder.CreateICmp(CmpInst::ICMP_EQ, LoopValue, TermValue,
+ "lsr_fold_term_cond.replaced_term_cond");
// Swap successors to exit loop body if IV equals to new TermValue
if (BI->getSuccessor(0) == L->getHeader())
BI->swapSuccessors();
LLVM_DEBUG(dbgs() << "Old term-cond:\n"
- << *OldTermCond << "\n"
- << "New term-cond:\n" << *NewTermCond << "\n");
+ << *OldTermCond << "\n"
+ << "New term-cond:\n"
+ << *NewTermCond << "\n");
BI->setCondition(NewTermCond);
@@ -337,7 +335,6 @@ void LoopTermFold::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MemorySSAWrapperPass>();
}
-
bool LoopTermFold::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
if (skipLoop(L))
return false;
@@ -370,14 +367,14 @@ PreservedAnalyses LoopTermFoldPass::run(Loop &L, LoopAnalysisManager &AM,
char LoopTermFold::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopTermFold, "loop-term-fold",
- "Loop Terminator Folding", false, false)
+INITIALIZE_PASS_BEGIN(LoopTermFold, "loop-term-fold", "Loop Terminator Folding",
+ false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_END(LoopTermFold, "loop-term-fold",
- "Loop Terminator Folding", false, false)
+INITIALIZE_PASS_END(LoopTermFold, "loop-term-fold", "Loop Terminator Folding",
+ false, false)
Pass *llvm::createLoopTermFoldPass() { return new LoopTermFold(); }
>From 1984624621e493695137121542cfcf734c0fa58d Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Sat, 17 Aug 2024 18:25:20 -0700
Subject: [PATCH 5/5] Remove codegen header per review comment
---
llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 1 -
llvm/lib/Transforms/Scalar/LoopTermFold.cpp | 1 -
2 files changed, 2 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 75754eb3a46714..a62b87fe2a53d4 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -81,7 +81,6 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
diff --git a/llvm/lib/Transforms/Scalar/LoopTermFold.cpp b/llvm/lib/Transforms/Scalar/LoopTermFold.cpp
index 5a595bc53cb751..12ef367adc43e3 100644
--- a/llvm/lib/Transforms/Scalar/LoopTermFold.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopTermFold.cpp
@@ -19,7 +19,6 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
More information about the llvm-commits
mailing list