[llvm] [LSR] Split the -lsr-term-fold transformation into it's own pass (PR #104234)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 14 13:53:05 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Philip Reames (preames)
<details>
<summary>Changes</summary>
This transformation doesn't actually use any of the internal state of
LSR and recomputes all information from SCEV. Splitting it out makes
it easier to test.
Note that long term I would like to write a version of this transform
which *is* integrated with LSR's solver, but if that happens, we'll
just delete the extra pass.
Integration wise, I switched from using TTI to using a pass configuration
variable. This seems slightly more idiomatic, and means we don't run
the extra logic on any target other than RISCV.
---
Patch is 43.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/104234.diff
25 Files Affected:
- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (-9)
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (-2)
- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (-5)
- (modified) llvm/include/llvm/CodeGen/TargetPassConfig.h (+3)
- (modified) llvm/include/llvm/InitializePasses.h (+1)
- (modified) llvm/include/llvm/LinkAllPasses.h (+1)
- (modified) llvm/include/llvm/Passes/MachinePassRegistry.def (+1)
- (modified) llvm/include/llvm/Transforms/Scalar.h (+8)
- (added) llvm/include/llvm/Transforms/Scalar/LoopTermFold.h (+30)
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (-4)
- (modified) llvm/lib/CodeGen/TargetPassConfig.cpp (+2)
- (modified) llvm/lib/Passes/PassBuilder.cpp (+1)
- (modified) llvm/lib/Passes/PassRegistry.def (+1)
- (modified) llvm/lib/Target/RISCV/RISCVTargetMachine.cpp (+1)
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h (-4)
- (modified) llvm/lib/Transforms/Scalar/CMakeLists.txt (+1)
- (modified) llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp (+1-262)
- (added) llvm/lib/Transforms/Scalar/LoopTermFold.cpp (+386)
- (modified) llvm/lib/Transforms/Scalar/Scalar.cpp (+1)
- (modified) llvm/test/CodeGen/RISCV/O3-pipeline.ll (+1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll (+1-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/RISCV/term-fold-crash.ll (+1-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll (+1-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll (+1-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/lsr-unreachable-bb-phi-node.ll (+3-2)
``````````diff
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 38e8b9da213974..2c998f3e3a73cd 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -742,11 +742,6 @@ class TargetTransformInfo {
/// cost should return false, otherwise return true.
bool isNumRegsMajorCostOfLSR() const;
- /// Return true if LSR should attempts to replace a use of an otherwise dead
- /// primary IV in the latch condition with another IV available in the loop.
- /// When successful, makes the primary IV dead.
- bool shouldFoldTerminatingConditionAfterLSR() const;
-
/// Return true if LSR should drop a found solution if it's calculated to be
/// less profitable than the baseline.
bool shouldDropLSRSolutionIfLessProfitable() const;
@@ -1891,7 +1886,6 @@ class TargetTransformInfo::Concept {
virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2) = 0;
virtual bool isNumRegsMajorCostOfLSR() = 0;
- virtual bool shouldFoldTerminatingConditionAfterLSR() const = 0;
virtual bool shouldDropLSRSolutionIfLessProfitable() const = 0;
virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
virtual bool canMacroFuseCmp() = 0;
@@ -2370,9 +2364,6 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
bool isNumRegsMajorCostOfLSR() override {
return Impl.isNumRegsMajorCostOfLSR();
}
- bool shouldFoldTerminatingConditionAfterLSR() const override {
- return Impl.shouldFoldTerminatingConditionAfterLSR();
- }
bool shouldDropLSRSolutionIfLessProfitable() const override {
return Impl.shouldDropLSRSolutionIfLessProfitable();
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index d208a710bb27fd..11b07ac0b7fc47 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -244,8 +244,6 @@ class TargetTransformInfoImplBase {
bool isNumRegsMajorCostOfLSR() const { return true; }
- bool shouldFoldTerminatingConditionAfterLSR() const { return false; }
-
bool shouldDropLSRSolutionIfLessProfitable() const { return false; }
bool isProfitableLSRChainElement(Instruction *I) const { return false; }
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 279cfb5aa47d6f..a57bd876561b60 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -394,11 +394,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR();
}
- bool shouldFoldTerminatingConditionAfterLSR() const {
- return TargetTransformInfoImplBase::
- shouldFoldTerminatingConditionAfterLSR();
- }
-
bool shouldDropLSRSolutionIfLessProfitable() const {
return TargetTransformInfoImplBase::shouldDropLSRSolutionIfLessProfitable();
}
diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index d00e0bed91a457..ecff65c483d8d8 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -140,6 +140,9 @@ class TargetPassConfig : public ImmutablePass {
/// callers.
bool RequireCodeGenSCCOrder = false;
+ /// Enable LoopTailFold immediately after LSR
+ bool EnableLoopTailFold = false;
+
/// Add the actual instruction selection passes. This does not include
/// preparation passes on IR.
bool addCoreISelPasses();
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 261912aab3076c..3bf1840cf9a0fe 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -168,6 +168,7 @@ void initializeLoopInfoWrapperPassPass(PassRegistry&);
void initializeLoopPassPass(PassRegistry&);
void initializeLoopSimplifyPass(PassRegistry&);
void initializeLoopStrengthReducePass(PassRegistry&);
+void initializeLoopTermFoldPass(PassRegistry&);
void initializeLoopUnrollPass(PassRegistry&);
void initializeLowerAtomicLegacyPassPass(PassRegistry&);
void initializeLowerConstantIntrinsicsPass(PassRegistry&);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index c00e425b131987..b490e816f740ca 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -88,6 +88,7 @@ namespace {
(void) llvm::createLoopExtractorPass();
(void) llvm::createLoopSimplifyPass();
(void) llvm::createLoopStrengthReducePass();
+ (void) llvm::createLoopTermFoldPass();
(void)llvm::createLoopUnrollPass();
(void) llvm::createLowerGlobalDtorsLegacyPass();
(void) llvm::createLowerInvokePass();
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 8e669ee5791239..05baf514fa7210 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -79,6 +79,7 @@ FUNCTION_PASS("win-eh-prepare", WinEHPreparePass())
#define LOOP_PASS(NAME, CREATE_PASS)
#endif
LOOP_PASS("loop-reduce", LoopStrengthReducePass())
+LOOP_PASS("loop-term-fold", LoopTermFoldPass())
#undef LOOP_PASS
#ifndef MACHINE_MODULE_PASS
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index 98d0adca355214..17f4327eb3e1ab 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -51,6 +51,14 @@ Pass *createLICMPass();
//
Pass *createLoopStrengthReducePass();
+//===----------------------------------------------------------------------===//
+//
+// LoopTermFold - This pass attempts to eliminate the last use of an IV in
+// a loop terminator instruction by rewriting it in terms of another IV.
+// Expected to be run immediately after LSR.
+//
+Pass *createLoopTermFoldPass();
+
//===----------------------------------------------------------------------===//
//
// LoopUnroll - This pass is a simple loop unrolling pass.
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopTermFold.h b/llvm/include/llvm/Transforms/Scalar/LoopTermFold.h
new file mode 100644
index 00000000000000..92c3ed35152756
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/LoopTermFold.h
@@ -0,0 +1,30 @@
+//===- LoopTermFold.h - Loop Term Fold Pass ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPTERMFOLD_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPTERMFOLD_H
+
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Loop;
+class LPMUpdater;
+
+class LoopTermFoldPass : public PassInfoMixin<LoopTermFoldPass> {
+public:
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPTERMFOLD_H
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index dcde78925bfa98..2c26493bd3f1ca 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -427,10 +427,6 @@ bool TargetTransformInfo::isNumRegsMajorCostOfLSR() const {
return TTIImpl->isNumRegsMajorCostOfLSR();
}
-bool TargetTransformInfo::shouldFoldTerminatingConditionAfterLSR() const {
- return TTIImpl->shouldFoldTerminatingConditionAfterLSR();
-}
-
bool TargetTransformInfo::shouldDropLSRSolutionIfLessProfitable() const {
return TTIImpl->shouldDropLSRSolutionIfLessProfitable();
}
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 1b0012b65b80d4..45ddc941b5e29b 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -828,6 +828,8 @@ void TargetPassConfig::addIRPasses() {
if (!DisableLSR) {
addPass(createCanonicalizeFreezeInLoopsPass());
addPass(createLoopStrengthReducePass());
+ if (EnableLoopTailFold)
+ addPass(createLoopTermFoldPass());
if (PrintLSR)
addPass(createPrintFunctionPass(dbgs(),
"\n\n*** Code after LSR ***\n"));
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 1859fde279c98d..2ba9e14b91c098 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -248,6 +248,7 @@
#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
#include "llvm/Transforms/Scalar/LoopSink.h"
#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
+#include "llvm/Transforms/Scalar/LoopTermFold.h"
#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 5ef8ba30944470..7cd944f3cee0b6 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -641,6 +641,7 @@ LOOP_PASS("loop-idiom-vectorize", LoopIdiomVectorizePass())
LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass())
LOOP_PASS("loop-predication", LoopPredicationPass())
LOOP_PASS("loop-reduce", LoopStrengthReducePass())
+LOOP_PASS("loop-term-fold", LoopTermFoldPass())
LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass())
LOOP_PASS("loop-unroll-full", LoopFullUnrollPass())
LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass())
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index b6884321f08411..e0ef5c91520ab8 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -336,6 +336,7 @@ class RISCVPassConfig : public TargetPassConfig {
if (TM.getOptLevel() != CodeGenOptLevel::None)
substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
setEnableSinkAndFold(EnableSinkFold);
+ EnableLoopTailFold = true;
}
RISCVTargetMachine &getRISCVTargetMachine() const {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 9c37a4f6ec2d04..b76c77fbee389d 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -394,10 +394,6 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2);
- bool shouldFoldTerminatingConditionAfterLSR() const {
- return true;
- }
-
std::optional<unsigned> getMinPageSize() const { return 4096; }
};
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index ba09ebf8b04c4c..939a1457239567 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -44,6 +44,7 @@ add_llvm_component_library(LLVMScalarOpts
LoopRotation.cpp
LoopSimplifyCFG.cpp
LoopStrengthReduce.cpp
+ LoopTermFold.cpp
LoopUnrollPass.cpp
LoopUnrollAndJamPass.cpp
LoopVersioningLICM.cpp
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 91461d1ed27592..75754eb3a46714 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -81,6 +81,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -189,10 +190,6 @@ static cl::opt<unsigned> SetupCostDepthLimit(
"lsr-setupcost-depth-limit", cl::Hidden, cl::init(7),
cl::desc("The limit on recursion depth for LSRs setup cost"));
-static cl::opt<cl::boolOrDefault> AllowTerminatingConditionFoldingAfterLSR(
- "lsr-term-fold", cl::Hidden,
- cl::desc("Attempt to replace primary IV with other IV."));
-
static cl::opt<cl::boolOrDefault> AllowDropSolutionIfLessProfitable(
"lsr-drop-solution", cl::Hidden,
cl::desc("Attempt to drop solution if it is less profitable"));
@@ -205,9 +202,6 @@ static cl::opt<bool> DropScaledForVScale(
"lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true),
cl::desc("Avoid using scaled registers with vscale-relative addressing"));
-STATISTIC(NumTermFold,
- "Number of terminating condition fold recognized and performed");
-
#ifndef NDEBUG
// Stress test IV chain generation.
static cl::opt<bool> StressIVChain(
@@ -7062,186 +7056,6 @@ static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
return nullptr;
}
-static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *, bool>>
-canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
- const LoopInfo &LI, const TargetTransformInfo &TTI) {
- if (!L->isInnermost()) {
- LLVM_DEBUG(dbgs() << "Cannot fold on non-innermost loop\n");
- return std::nullopt;
- }
- // Only inspect on simple loop structure
- if (!L->isLoopSimplifyForm()) {
- LLVM_DEBUG(dbgs() << "Cannot fold on non-simple loop\n");
- return std::nullopt;
- }
-
- if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
- LLVM_DEBUG(dbgs() << "Cannot fold on backedge that is loop variant\n");
- return std::nullopt;
- }
-
- BasicBlock *LoopLatch = L->getLoopLatch();
- BranchInst *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator());
- if (!BI || BI->isUnconditional())
- return std::nullopt;
- auto *TermCond = dyn_cast<ICmpInst>(BI->getCondition());
- if (!TermCond) {
- LLVM_DEBUG(
- dbgs() << "Cannot fold on branching condition that is not an ICmpInst");
- return std::nullopt;
- }
- if (!TermCond->hasOneUse()) {
- LLVM_DEBUG(
- dbgs()
- << "Cannot replace terminating condition with more than one use\n");
- return std::nullopt;
- }
-
- BinaryOperator *LHS = dyn_cast<BinaryOperator>(TermCond->getOperand(0));
- Value *RHS = TermCond->getOperand(1);
- if (!LHS || !L->isLoopInvariant(RHS))
- // We could pattern match the inverse form of the icmp, but that is
- // non-canonical, and this pass is running *very* late in the pipeline.
- return std::nullopt;
-
- // Find the IV used by the current exit condition.
- PHINode *ToFold;
- Value *ToFoldStart, *ToFoldStep;
- if (!matchSimpleRecurrence(LHS, ToFold, ToFoldStart, ToFoldStep))
- return std::nullopt;
-
- // Ensure the simple recurrence is a part of the current loop.
- if (ToFold->getParent() != L->getHeader())
- return std::nullopt;
-
- // If that IV isn't dead after we rewrite the exit condition in terms of
- // another IV, there's no point in doing the transform.
- if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond))
- return std::nullopt;
-
- // Inserting instructions in the preheader has a runtime cost, scale
- // the allowed cost with the loops trip count as best we can.
- const unsigned ExpansionBudget = [&]() {
- unsigned Budget = 2 * SCEVCheapExpansionBudget;
- if (unsigned SmallTC = SE.getSmallConstantMaxTripCount(L))
- return std::min(Budget, SmallTC);
- if (std::optional<unsigned> SmallTC = getLoopEstimatedTripCount(L))
- return std::min(Budget, *SmallTC);
- // Unknown trip count, assume long running by default.
- return Budget;
- }();
-
- const SCEV *BECount = SE.getBackedgeTakenCount(L);
- const DataLayout &DL = L->getHeader()->getDataLayout();
- SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
-
- PHINode *ToHelpFold = nullptr;
- const SCEV *TermValueS = nullptr;
- bool MustDropPoison = false;
- auto InsertPt = L->getLoopPreheader()->getTerminator();
- for (PHINode &PN : L->getHeader()->phis()) {
- if (ToFold == &PN)
- continue;
-
- if (!SE.isSCEVable(PN.getType())) {
- LLVM_DEBUG(dbgs() << "IV of phi '" << PN
- << "' is not SCEV-able, not qualified for the "
- "terminating condition folding.\n");
- continue;
- }
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
- // Only speculate on affine AddRec
- if (!AddRec || !AddRec->isAffine()) {
- LLVM_DEBUG(dbgs() << "SCEV of phi '" << PN
- << "' is not an affine add recursion, not qualified "
- "for the terminating condition folding.\n");
- continue;
- }
-
- // Check that we can compute the value of AddRec on the exiting iteration
- // without soundness problems. evaluateAtIteration internally needs
- // to multiply the stride of the iteration number - which may wrap around.
- // The issue here is subtle because computing the result accounting for
- // wrap is insufficient. In order to use the result in an exit test, we
- // must also know that AddRec doesn't take the same value on any previous
- // iteration. The simplest case to consider is a candidate IV which is
- // narrower than the trip count (and thus original IV), but this can
- // also happen due to non-unit strides on the candidate IVs.
- if (!AddRec->hasNoSelfWrap() ||
- !SE.isKnownNonZero(AddRec->getStepRecurrence(SE)))
- continue;
-
- const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE);
- const SCEV *TermValueSLocal = PostInc->evaluateAtIteration(BECount, SE);
- if (!Expander.isSafeToExpand(TermValueSLocal)) {
- LLVM_DEBUG(
- dbgs() << "Is not safe to expand terminating value for phi node" << PN
- << "\n");
- continue;
- }
-
- if (Expander.isHighCostExpansion(TermValueSLocal, L, ExpansionBudget,
- &TTI, InsertPt)) {
- LLVM_DEBUG(
- dbgs() << "Is too expensive to expand terminating value for phi node"
- << PN << "\n");
- continue;
- }
-
- // The candidate IV may have been otherwise dead and poison from the
- // very first iteration. If we can't disprove that, we can't use the IV.
- if (!mustExecuteUBIfPoisonOnPathTo(&PN, LoopLatch->getTerminator(), &DT)) {
- LLVM_DEBUG(dbgs() << "Can not prove poison safety for IV "
- << PN << "\n");
- continue;
- }
-
- // The candidate IV may become poison on the last iteration. If this
- // value is not branched on, this is a well defined program. We're
- // about to add a new use to this IV, and we have to ensure we don't
- // insert UB which didn't previously exist.
- bool MustDropPoisonLocal = false;
- Instruction *PostIncV =
- cast<Instruction>(PN.getIncomingValueForBlock(LoopLatch));
- if (!mustExecuteUBIfPoisonOnPathTo(PostIncV, LoopLatch->getTerminator(),
- &DT)) {
- LLVM_DEBUG(dbgs() << "Can not prove poison safety to insert use"
- << PN << "\n");
-
- // If this is a complex recurrance with multiple instructions computing
- // the backedge value, we might need to strip poison flags from all of
- // them.
- if (PostIncV->getOperand(0) != &PN)
- continue;
-
- // In order to perform the transform, we need to drop the poison generating
- // flags on this instruction (if any).
- MustDropPoisonLocal = PostIncV->hasPoisonGeneratingFlags();
- }
-
- // We pick the last legal alternate IV. We could expore choosing an optimal
- // alternate IV if we had a decent heuristic to do so.
- ToHelpFold = &PN;
- TermValueS = TermValueSLocal;
- MustDropPoison = MustDropPoisonLocal;
- }
-
- LLVM_DEBUG(if (ToFold && !ToHelpFold) dbgs()
- << "Cannot find other AddRec IV to help folding\n";);
-
- LLVM_DEBUG(if (ToFold && ToHelpFold) dbgs()
- << "\nFound loop that can fold terminati...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/104234
More information about the llvm-commits
mailing list