[llvm] change contents of ScalarEvolution from private to protected (PR #83052)
Joshua Ferguson via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 29 12:32:49 PST 2024
https://github.com/skewballfox updated https://github.com/llvm/llvm-project/pull/83052
>From eea887cf6be39856fa441ed48f72c1c9177a76a6 Mon Sep 17 00:00:00 2001
From: Joshua Ferguson <joshua.ferguson.273 at gmail.com>
Date: Sun, 25 Feb 2024 14:06:02 -0600
Subject: [PATCH 1/7] mainly pushing to switch machines
---
llvm/include/llvm/Analysis/ScalarEvolution.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 0880f9c65aa45d..1b03437de30c28 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1345,7 +1345,7 @@ class ScalarEvolution {
}
};
-private:
+protected:
/// A CallbackVH to arrange for ScalarEvolution to be notified whenever a
/// Value is deleted.
class SCEVCallbackVH final : public CallbackVH {
>From e47436b767d635c14c10fc8c0bfc4fe30b8967d6 Mon Sep 17 00:00:00 2001
From: skewballfox <joshua.ferguson.273 at gmail.com>
Date: Thu, 29 Feb 2024 08:35:45 -0600
Subject: [PATCH 2/7] added AssumeLoopExits bool to SE, lifting MustExit code
into SE
---
llvm/include/llvm/Analysis/ScalarEvolution.h | 9 ++-
.../llvm/Analysis/Utils/EnzymeFunctionUtils.h | 71 +++++++++++++++++++
llvm/lib/Analysis/ScalarEvolution.cpp | 8 ++-
3 files changed, 84 insertions(+), 4 deletions(-)
create mode 100644 llvm/include/llvm/Analysis/Utils/EnzymeFunctionUtils.h
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 1b03437de30c28..3075358e95791f 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -460,6 +460,9 @@ class ScalarEvolution {
LoopComputable ///< The SCEV varies predictably with the loop.
};
+ bool AssumeLoopExists = false;
+ void setAssumeLoopExists();
+
/// An enum describing the relationship between a SCEV and a basic block.
enum BlockDisposition {
DoesNotDominateBlock, ///< The SCEV does not dominate the block.
@@ -1345,7 +1348,7 @@ class ScalarEvolution {
}
};
-protected:
+ private:
/// A CallbackVH to arrange for ScalarEvolution to be notified whenever a
/// Value is deleted.
class SCEVCallbackVH final : public CallbackVH {
@@ -1364,7 +1367,7 @@ class ScalarEvolution {
/// The function we are analyzing.
Function &F;
-
+
/// Does the module have any calls to the llvm.experimental.guard intrinsic
/// at all? If this is false, we avoid doing work that will only help if
/// thare are guards present in the IR.
@@ -1765,7 +1768,7 @@ class ScalarEvolution {
/// an arbitrary expression as opposed to only constants.
const SCEV *computeSymbolicMaxBackedgeTakenCount(const Loop *L);
- // Helper functions for computeExitLimitFromCond to avoid exponential time
+// Helper functions for computeExitLimitFromCond to avoid exponential time
// complexity.
class ExitLimitCache {
diff --git a/llvm/include/llvm/Analysis/Utils/EnzymeFunctionUtils.h b/llvm/include/llvm/Analysis/Utils/EnzymeFunctionUtils.h
new file mode 100644
index 00000000000000..a211bdca6a47d6
--- /dev/null
+++ b/llvm/include/llvm/Analysis/Utils/EnzymeFunctionUtils.h
@@ -0,0 +1,71 @@
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+
+#include "llvm/IR/Function.h"
+
+#include "llvm/IR/Instructions.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <deque>
+
+
+// TODO note this doesn't go through [loop, unreachable], and we could get more
+// performance by doing this can consider doing some domtree magic potentially
+static inline llvm::SmallPtrSet<llvm::BasicBlock *, 4>
+getGuaranteedUnreachable(llvm::Function *F) {
+ llvm::SmallPtrSet<llvm::BasicBlock *, 4> knownUnreachables;
+ if (F->empty())
+ return knownUnreachables;
+ std::deque<llvm::BasicBlock *> todo;
+ for (auto &BB : *F) {
+ todo.push_back(&BB);
+ }
+
+ while (!todo.empty()) {
+ llvm::BasicBlock *next = todo.front();
+ todo.pop_front();
+
+ if (knownUnreachables.find(next) != knownUnreachables.end())
+ continue;
+
+ if (llvm::isa<llvm::ReturnInst>(next->getTerminator()))
+ continue;
+
+ if (llvm::isa<llvm::UnreachableInst>(next->getTerminator())) {
+ knownUnreachables.insert(next);
+ for (llvm::BasicBlock *Pred : predecessors(next)) {
+ todo.push_back(Pred);
+ }
+ continue;
+ }
+
+ // Assume resumes don't happen
+ // TODO consider EH
+ if (llvm::isa<llvm::ResumeInst>(next->getTerminator())) {
+ knownUnreachables.insert(next);
+ for (llvm::BasicBlock *Pred : predecessors(next)) {
+ todo.push_back(Pred);
+ }
+ continue;
+ }
+
+ bool unreachable = true;
+ for (llvm::BasicBlock *Succ : llvm::successors(next)) {
+ if (knownUnreachables.find(Succ) == knownUnreachables.end()) {
+ unreachable = false;
+ break;
+ }
+ }
+
+ if (!unreachable)
+ continue;
+ knownUnreachables.insert(next);
+ for (llvm::BasicBlock *Pred : llvm::predecessors(next)) {
+ todo.push_back(Pred);
+ }
+ continue;
+ }
+
+ return knownUnreachables;
+}
\ No newline at end of file
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 4b2db80bc1ec30..6dc59108f5e188 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -82,6 +82,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Analysis/Utils/EnzymeFunctionUtils.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -509,6 +510,10 @@ const SCEV *ScalarEvolution::getVScale(Type *Ty) {
return S;
}
+void ScalarEvolution::setAssumeLoopExists() {
+ this->AssumeLoopExists=true;
+}
+
SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy,
const SCEV *op, Type *ty)
: SCEV(ID, SCEVTy, computeExpressionSize(op)), Op(op), Ty(ty) {}
@@ -7413,7 +7418,7 @@ bool ScalarEvolution::loopIsFiniteByAssumption(const Loop *L) {
// A mustprogress loop without side effects must be finite.
// TODO: The check used here is very conservative. It's only *specific*
// side effects which are well defined in infinite loops.
- return isFinite(L) || (isMustProgress(L) && loopHasNoSideEffects(L));
+ return this->AssumeLoopExists || isFinite(L) || (isMustProgress(L) && loopHasNoSideEffects(L));
}
const SCEV *ScalarEvolution::createSCEVIter(Value *V) {
@@ -13354,6 +13359,7 @@ const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
return getSizeOfExpr(ETy, Ty);
}
+
//===----------------------------------------------------------------------===//
// SCEVCallbackVH Class Implementation
//===----------------------------------------------------------------------===//
>From f55e361a3ba1d4a5ca30f4b9719d23d57d273cc5 Mon Sep 17 00:00:00 2001
From: skewballfox <joshua.ferguson.273 at gmail.com>
Date: Thu, 29 Feb 2024 09:51:55 -0600
Subject: [PATCH 3/7] added MustExitcode for computeExitLimit
---
llvm/include/llvm/Analysis/ScalarEvolution.h | 7 ++--
.../llvm/Analysis/Utils/EnzymeFunctionUtils.h | 1 -
llvm/lib/Analysis/ScalarEvolution.cpp | 32 +++++++++++++++----
3 files changed, 30 insertions(+), 10 deletions(-)
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 3075358e95791f..4cc1954c1233f6 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -462,6 +462,7 @@ class ScalarEvolution {
bool AssumeLoopExists = false;
void setAssumeLoopExists();
+ llvm::SmallPtrSet<llvm::BasicBlock *, 4> GuaranteedUnreachable;
/// An enum describing the relationship between a SCEV and a basic block.
enum BlockDisposition {
@@ -1348,7 +1349,7 @@ class ScalarEvolution {
}
};
- private:
+private:
/// A CallbackVH to arrange for ScalarEvolution to be notified whenever a
/// Value is deleted.
class SCEVCallbackVH final : public CallbackVH {
@@ -1367,7 +1368,7 @@ class ScalarEvolution {
/// The function we are analyzing.
Function &F;
-
+
/// Does the module have any calls to the llvm.experimental.guard intrinsic
/// at all? If this is false, we avoid doing work that will only help if
/// thare are guards present in the IR.
@@ -1768,7 +1769,7 @@ class ScalarEvolution {
/// an arbitrary expression as opposed to only constants.
const SCEV *computeSymbolicMaxBackedgeTakenCount(const Loop *L);
-// Helper functions for computeExitLimitFromCond to avoid exponential time
+ // Helper functions for computeExitLimitFromCond to avoid exponential time
// complexity.
class ExitLimitCache {
diff --git a/llvm/include/llvm/Analysis/Utils/EnzymeFunctionUtils.h b/llvm/include/llvm/Analysis/Utils/EnzymeFunctionUtils.h
index a211bdca6a47d6..59032cbe6dddd4 100644
--- a/llvm/include/llvm/Analysis/Utils/EnzymeFunctionUtils.h
+++ b/llvm/include/llvm/Analysis/Utils/EnzymeFunctionUtils.h
@@ -9,7 +9,6 @@
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <deque>
-
// TODO note this doesn't go through [loop, unreachable], and we could get more
// performance by doing this can consider doing some domtree magic potentially
static inline llvm::SmallPtrSet<llvm::BasicBlock *, 4>
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 6dc59108f5e188..c1071f07b7f280 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -80,9 +80,9 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/Utils/EnzymeFunctionUtils.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/Analysis/Utils/EnzymeFunctionUtils.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -510,9 +510,7 @@ const SCEV *ScalarEvolution::getVScale(Type *Ty) {
return S;
}
-void ScalarEvolution::setAssumeLoopExists() {
- this->AssumeLoopExists=true;
-}
+void ScalarEvolution::setAssumeLoopExists() { this->AssumeLoopExists = true; }
SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy,
const SCEV *op, Type *ty)
@@ -7418,7 +7416,8 @@ bool ScalarEvolution::loopIsFiniteByAssumption(const Loop *L) {
// A mustprogress loop without side effects must be finite.
// TODO: The check used here is very conservative. It's only *specific*
// side effects which are well defined in infinite loops.
- return this->AssumeLoopExists || isFinite(L) || (isMustProgress(L) && loopHasNoSideEffects(L));
+ return this->AssumeLoopExists || isFinite(L) ||
+ (isMustProgress(L) && loopHasNoSideEffects(L));
}
const SCEV *ScalarEvolution::createSCEVIter(Value *V) {
@@ -8833,6 +8832,26 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
ScalarEvolution::ExitLimit
ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
bool AllowPredicates) {
+ if (AssumeLoopExists) {
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (auto &ExitingBlock : ExitingBlocks) {
+ BasicBlock *Exit = nullptr;
+ for (auto *SBB : successors(ExitingBlock)) {
+ if (!L->contains(SBB)) {
+ if (GuaranteedUnreachable.count(SBB))
+ continue;
+ Exit = SBB;
+ break;
+ }
+ }
+ if (!Exit)
+ ExitingBlock = nullptr;
+ }
+ ExitingBlocks.erase(
+ std::remove(ExitingBlocks.begin(), ExitingBlocks.end(), nullptr),
+ ExitingBlocks.end());
+ }
assert(L->contains(ExitingBlock) && "Exit count for non-loop block?");
// If our exiting block does not dominate the latch, then its connection with
// loop's exit limit may be far from trivial.
@@ -8858,6 +8877,8 @@ ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
BasicBlock *Exit = nullptr;
for (auto *SBB : successors(ExitingBlock))
if (!L->contains(SBB)) {
+ if (AssumeLoopExists and GuaranteedUnreachable.count(SBB))
+ continue;
if (Exit) // Multiple exit successors.
return getCouldNotCompute();
Exit = SBB;
@@ -13359,7 +13380,6 @@ const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
return getSizeOfExpr(ETy, Ty);
}
-
//===----------------------------------------------------------------------===//
// SCEVCallbackVH Class Implementation
//===----------------------------------------------------------------------===//
>From 8e85c0653be244e036e68eb31a4022ff05b23257 Mon Sep 17 00:00:00 2001
From: skewballfox <joshua.ferguson.273 at gmail.com>
Date: Thu, 29 Feb 2024 10:33:22 -0600
Subject: [PATCH 4/7] added enzyme mustExit code to
computeExitLimitFromSingleExitSwitch
---
llvm/lib/Analysis/ScalarEvolution.cpp | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index c1071f07b7f280..d28436e02466be 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9264,8 +9264,14 @@ ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L,
if (Switch->getDefaultDest() == ExitingBlock)
return getCouldNotCompute();
- assert(L->contains(Switch->getDefaultDest()) &&
- "Default case must not exit the loop!");
+ // if not using enzyme executes by default
+ // if using enzyme and the code is guaranteed unreachable,
+ // the default destination doesn't matter
+ if (!AssumeLoopExists ||
+ !GuaranteedUnreachable.count(Switch->getDefaultDest())) {
+ assert(L->contains(Switch->getDefaultDest()) &&
+ "Default case must not exit the loop!");
+ }
const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L);
const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
>From 3f378b5c9370355e3b5fc66709df06ec4f3970f3 Mon Sep 17 00:00:00 2001
From: skewballfox <joshua.ferguson.273 at gmail.com>
Date: Thu, 29 Feb 2024 11:05:50 -0600
Subject: [PATCH 5/7] add enzyme must exit code to computeExitLimitFromCondImpl
---
llvm/lib/Analysis/ScalarEvolution.cpp | 109 ++++++++++++++++++++++++--
1 file changed, 104 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index d28436e02466be..62f8ddfa720812 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -8949,10 +8949,104 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached(
ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
bool ControlsOnlyExit, bool AllowPredicates) {
- // Handle BinOp conditions (And, Or).
- if (auto LimitFromBinOp = computeExitLimitFromCondFromBinOp(
- Cache, L, ExitCond, ExitIfTrue, ControlsOnlyExit, AllowPredicates))
- return *LimitFromBinOp;
+ if (!AssumeLoopExists) {
+ // Handle BinOp conditions (And, Or).
+ if (auto LimitFromBinOp = computeExitLimitFromCondFromBinOp(
+ Cache, L, ExitCond, ExitIfTrue, ControlsOnlyExit, AllowPredicates))
+ return *LimitFromBinOp;
+ } else {
+ // Check if the controlling expression for this loop is an And or Or.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
+ if (BO->getOpcode() == Instruction::And) {
+ // Recurse on the operands of the and.
+ bool EitherMayExit = !ExitIfTrue;
+ ExitLimit EL0 = computeExitLimitFromCondCached(
+ Cache, L, BO->getOperand(0), ExitIfTrue,
+ ControlsOnlyExit && !EitherMayExit, AllowPredicates);
+ ExitLimit EL1 = computeExitLimitFromCondCached(
+ Cache, L, BO->getOperand(1), ExitIfTrue,
+ ControlsOnlyExit && !EitherMayExit, AllowPredicates);
+ const SCEV *BECount = getCouldNotCompute();
+ const SCEV *MaxBECount = getCouldNotCompute();
+ if (EitherMayExit) {
+ // Both conditions must be true for the loop to continue executing.
+ // Choose the less conservative count.
+ if (EL0.ExactNotTaken == getCouldNotCompute() ||
+ EL1.ExactNotTaken == getCouldNotCompute())
+ BECount = getCouldNotCompute();
+ else
+ BECount = getUMinFromMismatchedTypes(EL0.ExactNotTaken,
+ EL1.ExactNotTaken);
+
+ if (EL0.ConstantMaxNotTaken == getCouldNotCompute())
+ MaxBECount = EL1.ConstantMaxNotTaken;
+ else if (EL1.ConstantMaxNotTaken == getCouldNotCompute())
+ MaxBECount = EL0.ConstantMaxNotTaken;
+ else
+ MaxBECount = getUMinFromMismatchedTypes(EL0.ConstantMaxNotTaken,
+ EL1.ConstantMaxNotTaken);
+ } else {
+ // Both conditions must be true at the same time for the loop to exit.
+ // For now, be conservative.
+ if (EL0.ConstantMaxNotTaken == EL1.ConstantMaxNotTaken)
+ MaxBECount = EL0.ConstantMaxNotTaken;
+ if (EL0.ExactNotTaken == EL1.ExactNotTaken)
+ BECount = EL0.ExactNotTaken;
+ }
+
+ // There are cases (e.g. PR26207) where computeExitLimitFromCond is able
+ // to be more aggressive when computing BECount than when computing
+ // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and
+ // EL1.ExactNotTaken to match, but for EL0.ConstantMaxNotTaken and
+ // EL1.ConstantMaxNotTaken to not.
+ if (isa<SCEVCouldNotCompute>(MaxBECount) &&
+ !isa<SCEVCouldNotCompute>(BECount))
+ MaxBECount = getConstant(getUnsignedRangeMax(BECount));
+
+ return ExitLimit(BECount, MaxBECount, MaxBECount, false,
+ {&EL0.Predicates, &EL1.Predicates});
+ }
+ if (BO->getOpcode() == Instruction::Or) {
+ // Recurse on the operands of the or.
+ bool EitherMayExit = ExitIfTrue;
+ ExitLimit EL0 = computeExitLimitFromCondCached(
+ Cache, L, BO->getOperand(0), ExitIfTrue,
+ ControlsOnlyExit && !EitherMayExit, AllowPredicates);
+ ExitLimit EL1 = computeExitLimitFromCondCached(
+ Cache, L, BO->getOperand(1), ExitIfTrue,
+ ControlsOnlyExit && !EitherMayExit, AllowPredicates);
+ const SCEV *BECount = getCouldNotCompute();
+ const SCEV *MaxBECount = getCouldNotCompute();
+ if (EitherMayExit) {
+ // Both conditions must be false for the loop to continue executing.
+ // Choose the less conservative count.
+ if (EL0.ExactNotTaken == getCouldNotCompute() ||
+ EL1.ExactNotTaken == getCouldNotCompute())
+ BECount = getCouldNotCompute();
+ else
+ BECount = getUMinFromMismatchedTypes(EL0.ExactNotTaken,
+ EL1.ExactNotTaken);
+
+ if (EL0.ConstantMaxNotTaken == getCouldNotCompute())
+ MaxBECount = EL1.ConstantMaxNotTaken;
+ else if (EL1.ConstantMaxNotTaken == getCouldNotCompute())
+ MaxBECount = EL0.ConstantMaxNotTaken;
+ else
+ MaxBECount = getUMinFromMismatchedTypes(EL0.ConstantMaxNotTaken,
+ EL1.ConstantMaxNotTaken);
+ } else {
+ // Both conditions must be false at the same time for the loop to
+ // exit. For now, be conservative.
+ if (EL0.ConstantMaxNotTaken == EL1.ConstantMaxNotTaken)
+ MaxBECount = EL0.ConstantMaxNotTaken;
+ if (EL0.ExactNotTaken == EL1.ExactNotTaken)
+ BECount = EL0.ExactNotTaken;
+ }
+ return ExitLimit(BECount, MaxBECount, MaxBECount, false,
+ {&EL0.Predicates, &EL1.Predicates});
+ }
+ }
+ }
// With an icmp, it may be feasible to compute an exact backedge-taken count.
// Proceed to the next level to examine the icmp.
@@ -8973,12 +9067,17 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
// preserve the CFG and is temporarily leaving constant conditions
// in place.
if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
- if (ExitIfTrue == !CI->getZExtValue())
+ if (ExitIfTrue == !CI->getZExtValue()) {
// The backedge is always taken.
return getCouldNotCompute();
+ }
// The backedge is never taken.
return getZero(CI->getType());
}
+ // The rest of this code was missing from the MustExitScalarEvolution
+ // overrides
+ // so this should never be reached if using enzyme
+ assert(!AssumeLoopExists);
// If we're exiting based on the overflow flag of an x.with.overflow intrinsic
// with a constant step, we can form an equivalent icmp predicate and figure
>From 14a0c6c187d61db2e017202283be20d17cc93ed7 Mon Sep 17 00:00:00 2001
From: skewballfox <joshua.ferguson.273 at gmail.com>
Date: Thu, 29 Feb 2024 11:31:39 -0600
Subject: [PATCH 6/7] implemented enzyme must exit code in
computeExitLimitFromICmp
---
llvm/lib/Analysis/ScalarEvolution.cpp | 183 +++++++++++++++++++++-----
1 file changed, 151 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 62f8ddfa720812..b6e88b563e2724 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9074,35 +9074,32 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
// The backedge is never taken.
return getZero(CI->getType());
}
- // The rest of this code was missing from the MustExitScalarEvolution
- // overrides
- // so this should never be reached if using enzyme
- assert(!AssumeLoopExists);
-
- // If we're exiting based on the overflow flag of an x.with.overflow intrinsic
- // with a constant step, we can form an equivalent icmp predicate and figure
- // out how many iterations will be taken before we exit.
- const WithOverflowInst *WO;
- const APInt *C;
- if (match(ExitCond, m_ExtractValue<1>(m_WithOverflowInst(WO))) &&
- match(WO->getRHS(), m_APInt(C))) {
- ConstantRange NWR =
- ConstantRange::makeExactNoWrapRegion(WO->getBinaryOp(), *C,
- WO->getNoWrapKind());
- CmpInst::Predicate Pred;
- APInt NewRHSC, Offset;
- NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
- if (!ExitIfTrue)
- Pred = ICmpInst::getInversePredicate(Pred);
- auto *LHS = getSCEV(WO->getLHS());
- if (Offset != 0)
- LHS = getAddExpr(LHS, getConstant(Offset));
- auto EL = computeExitLimitFromICmp(L, Pred, LHS, getConstant(NewRHSC),
- ControlsOnlyExit, AllowPredicates);
- if (EL.hasAnyInfo())
- return EL;
- }
+ // block was never executed in MustExitScalarEvolution code
+ if (!AssumeLoopExists) {
+ // If we're exiting based on the overflow flag of an x.with.overflow
+ // intrinsic with a constant step, we can form an equivalent icmp predicate
+ // and figure out how many iterations will be taken before we exit.
+ const WithOverflowInst *WO;
+ const APInt *C;
+ if (match(ExitCond, m_ExtractValue<1>(m_WithOverflowInst(WO))) &&
+ match(WO->getRHS(), m_APInt(C))) {
+ ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
+ WO->getBinaryOp(), *C, WO->getNoWrapKind());
+ CmpInst::Predicate Pred;
+ APInt NewRHSC, Offset;
+ NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
+ if (!ExitIfTrue)
+ Pred = ICmpInst::getInversePredicate(Pred);
+ auto *LHS = getSCEV(WO->getLHS());
+ if (Offset != 0)
+ LHS = getAddExpr(LHS, getConstant(Offset));
+ auto EL = computeExitLimitFromICmp(L, Pred, LHS, getConstant(NewRHSC),
+ ControlsOnlyExit, AllowPredicates);
+ if (EL.hasAnyInfo())
+ return EL;
+ }
+ }
// If it's not an integer or pointer comparison then compute it the hard way.
return computeExitCountExhaustively(L, ExitCond, ExitIfTrue);
}
@@ -9201,12 +9198,134 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp(
const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
+ if (!AssumeLoopExists) {
+ ExitLimit EL = computeExitLimitFromICmp(L, Pred, LHS, RHS, ControlsOnlyExit,
+ AllowPredicates);
+ if (EL.hasAnyInfo())
+ return EL;
+ } else {
+#define PROP_PHI(LHS) \
+ if (auto un = dyn_cast<SCEVUnknown>(LHS)) { \
+ if (auto pn = dyn_cast_or_null<PHINode>(un->getValue())) { \
+ const SCEV *sc = nullptr; \
+ bool failed = false; \
+ for (auto &a : pn->incoming_values()) { \
+ auto subsc = getSCEV(a); \
+ if (sc == nullptr) { \
+ sc = subsc; \
+ continue; \
+ } \
+ if (subsc != sc) { \
+ failed = true; \
+ break; \
+ } \
+ } \
+ if (!failed) { \
+ LHS = sc; \
+ } \
+ } \
+ }
+ PROP_PHI(LHS)
+ PROP_PHI(RHS)
+
+ // Try to evaluate any dependencies out of the loop.
+ LHS = getSCEVAtScope(LHS, L);
+ RHS = getSCEVAtScope(RHS, L);
+
+ // At this point, we would like to compute how many iterations of the
+ // loop the predicate will return true for these inputs.
+ if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
+ // If there is a loop-invariant, force it into the RHS.
+ std::swap(LHS, RHS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
- ExitLimit EL = computeExitLimitFromICmp(L, Pred, LHS, RHS, ControlsOnlyExit,
- AllowPredicates);
- if (EL.hasAnyInfo())
- return EL;
+ // Simplify the operands before analyzing them.
+ (void)SimplifyICmpOperands(Pred, LHS, RHS);
+ // If we have a comparison of a chrec against a constant, try to use value
+ // ranges to answer this query.
+ if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
+ if (AddRec->getLoop() == L) {
+ // Form the constant range.
+ ConstantRange CompRange =
+ ConstantRange::makeExactICmpRegion(Pred, RHSC->getAPInt());
+
+ const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
+ if (!isa<SCEVCouldNotCompute>(Ret))
+ return Ret;
+ }
+
+ switch (Pred) {
+ case ICmpInst::ICMP_NE: { // while (X != Y)
+ // Convert to: while (X-Y != 0)
+ ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsOnlyExit,
+ AllowPredicates);
+ if (EL.hasAnyInfo())
+ return EL;
+ break;
+ }
+ case ICmpInst::ICMP_EQ: { // while (X == Y)
+ // Convert to: while (X-Y == 0)
+ ExitLimit EL = howFarToNonZero(getMinusSCEV(LHS, RHS), L);
+ if (EL.hasAnyInfo())
+ return EL;
+ break;
+ }
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_ULE: { // while (X < Y)
+ bool IsSigned = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE;
+
+ if (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_ULE) {
+ if (!isa<IntegerType>(RHS->getType()))
+ break;
+ SmallVector<const SCEV *, 2> sv = {
+ RHS, getConstant(
+ ConstantInt::get(cast<IntegerType>(RHS->getType()), 1))};
+ // Since this is not an infinite loop by induction, RHS cannot be
+ // int_max/uint_max Therefore adding 1 does not wrap.
+ if (IsSigned)
+ RHS = getAddExpr(sv, SCEV::FlagNSW);
+ else
+ RHS = getAddExpr(sv, SCEV::FlagNUW);
+ }
+ ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsOnlyExit,
+ AllowPredicates);
+ if (EL.hasAnyInfo())
+ return EL;
+ break;
+ }
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGE: { // while (X > Y)
+ bool IsSigned = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE;
+ if (Pred == ICmpInst::ICMP_SGE || Pred == ICmpInst::ICMP_UGE) {
+ if (!isa<IntegerType>(RHS->getType()))
+ break;
+ SmallVector<const SCEV *, 2> sv = {
+ RHS, getConstant(
+ ConstantInt::get(cast<IntegerType>(RHS->getType()), -1))};
+ // Since this is not an infinite loop by induction, RHS cannot be
+ // int_min/uint_min Therefore subtracting 1 does not wrap.
+ if (IsSigned)
+ RHS = getAddExpr(sv, SCEV::FlagNSW);
+ else
+ RHS = getAddExpr(sv, SCEV::FlagNUW);
+ }
+ ExitLimit EL = howManyGreaterThans(LHS, RHS, L, IsSigned,
+ ControlsOnlyExit, AllowPredicates);
+ if (EL.hasAnyInfo())
+ return EL;
+ break;
+ }
+ default:
+ break;
+ }
+ }
auto *ExhaustiveCount =
computeExitCountExhaustively(L, ExitCond, ExitIfTrue);
>From abb0ab463de42b5b66261fed48de69d8980b30c0 Mon Sep 17 00:00:00 2001
From: skewballfox <joshua.ferguson.273 at gmail.com>
Date: Thu, 29 Feb 2024 14:30:36 -0600
Subject: [PATCH 7/7] add Enzyme changes to SE howManyLessThans
---
llvm/lib/Analysis/ScalarEvolution.cpp | 100 ++++++++++++++++----------
1 file changed, 63 insertions(+), 37 deletions(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index b6e88b563e2724..854cfec1e6805d 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -12983,38 +12983,50 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
if (auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS)) {
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ZExt->getOperand());
if (AR && AR->getLoop() == L && AR->isAffine()) {
- auto canProveNUW = [&]() {
- // We can use the comparison to infer no-wrap flags only if it fully
- // controls the loop exit.
- if (!ControlsOnlyExit)
- return false;
-
- if (!isLoopInvariant(RHS, L))
- return false;
-
- if (!isKnownNonZero(AR->getStepRecurrence(*this)))
- // We need the sequence defined by AR to strictly increase in the
- // unsigned integer domain for the logic below to hold.
- return false;
-
- const unsigned InnerBitWidth = getTypeSizeInBits(AR->getType());
- const unsigned OuterBitWidth = getTypeSizeInBits(RHS->getType());
- // If RHS <=u Limit, then there must exist a value V in the sequence
- // defined by AR (e.g. {Start,+,Step}) such that V >u RHS, and
- // V <=u UINT_MAX. Thus, we must exit the loop before unsigned
- // overflow occurs. This limit also implies that a signed comparison
- // (in the wide bitwidth) is equivalent to an unsigned comparison as
- // the high bits on both sides must be zero.
- APInt StrideMax = getUnsignedRangeMax(AR->getStepRecurrence(*this));
- APInt Limit = APInt::getMaxValue(InnerBitWidth) - (StrideMax - 1);
- Limit = Limit.zext(OuterBitWidth);
- return getUnsignedRangeMax(applyLoopGuards(RHS, L)).ule(Limit);
- };
- auto Flags = AR->getNoWrapFlags();
- if (!hasFlags(Flags, SCEV::FlagNUW) && canProveNUW())
- Flags = setFlags(Flags, SCEV::FlagNUW);
+ if (!AssumeLoopExists) {
+ auto canProveNUW = [&]() {
+ // We can use the comparison to infer no-wrap flags only if it fully
+ // controls the loop exit.
+ if (!ControlsOnlyExit)
+ return false;
- setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), Flags);
+ if (!isLoopInvariant(RHS, L))
+ return false;
+
+ if (!isKnownNonZero(AR->getStepRecurrence(*this)))
+ // We need the sequence defined by AR to strictly increase in the
+ // unsigned integer domain for the logic below to hold.
+ return false;
+
+ const unsigned InnerBitWidth = getTypeSizeInBits(AR->getType());
+ const unsigned OuterBitWidth = getTypeSizeInBits(RHS->getType());
+ // If RHS <=u Limit, then there must exist a value V in the sequence
+ // defined by AR (e.g. {Start,+,Step}) such that V >u RHS, and
+ // V <=u UINT_MAX. Thus, we must exit the loop before unsigned
+ // overflow occurs. This limit also implies that a signed
+ // comparison (in the wide bitwidth) is equivalent to an unsigned
+ // comparison as the high bits on both sides must be zero.
+ APInt StrideMax = getUnsignedRangeMax(AR->getStepRecurrence(*this));
+ APInt Limit = APInt::getMaxValue(InnerBitWidth) - (StrideMax - 1);
+ Limit = Limit.zext(OuterBitWidth);
+ return getUnsignedRangeMax(applyLoopGuards(RHS, L)).ule(Limit);
+ };
+ auto Flags = AR->getNoWrapFlags();
+ if (!hasFlags(Flags, SCEV::FlagNUW) && canProveNUW())
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), Flags);
+ } else {
+ auto Flags = AR->getNoWrapFlags();
+ if (!hasFlags(Flags, SCEV::FlagNW) && canAssumeNoSelfWrap(AR)) {
+ Flags = setFlags(Flags, SCEV::FlagNW);
+
+ SmallVector<const SCEV *, 4> Operands{AR->operands()};
+ Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
+
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), Flags);
+ }
+ }
if (AR->hasNoUnsignedWrap()) {
// Emulate what getZeroExtendExpr would have done during construction
// if we'd been able to infer the fact just above at that time.
@@ -13098,6 +13110,13 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
!loopHasNoAbnormalExits(L))
return getCouldNotCompute();
+ // This bailout is protecting the logic in computeMaxBECountForLT which
+ // has not yet been sufficiently auditted or tested with negative strides.
+ // We used to filter out all known-non-positive cases here, we're in the
+ // process of being less restrictive bit by bit.
+ if (AssumeLoopExists && IsSigned && isKnownNonPositive(Stride))
+ return getCouldNotCompute();
+
if (!isKnownNonZero(Stride)) {
// If we have a step of zero, and RHS isn't invariant in L, we don't know
// if it might eventually be greater than start and if so, on which
@@ -13227,13 +13246,17 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
if (!BECount) {
auto canProveRHSGreaterThanEqualStart = [&]() {
auto CondGE = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
- const SCEV *GuardedRHS = applyLoopGuards(OrigRHS, L);
- const SCEV *GuardedStart = applyLoopGuards(OrigStart, L);
- if (isLoopEntryGuardedByCond(L, CondGE, OrigRHS, OrigStart) ||
- isKnownPredicate(CondGE, GuardedRHS, GuardedStart))
- return true;
+ if (isLoopEntryGuardedByCond(L, CondGE, OrigRHS, OrigStart)) {
+ if (AssumeLoopExists) {
+ return true;
+ }
+ const SCEV *GuardedRHS = applyLoopGuards(OrigRHS, L);
+ const SCEV *GuardedStart = applyLoopGuards(OrigStart, L);
+ if (isKnownPredicate(CondGE, GuardedRHS, GuardedStart))
+ return true;
+ }
// (RHS > Start - 1) implies RHS >= Start.
// * "RHS >= Start" is trivially equivalent to "RHS > Start - 1" if
// "Start - 1" doesn't overflow.
@@ -13370,7 +13393,10 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
if (isa<SCEVCouldNotCompute>(ConstantMaxBECount) &&
!isa<SCEVCouldNotCompute>(BECount))
ConstantMaxBECount = getConstant(getUnsignedRangeMax(BECount));
-
+ if (AssumeLoopExists) {
+ return ExitLimit(BECount, ConstantMaxBECount, ConstantMaxBECount, MaxOrZero,
+ Predicates);
+ }
const SCEV *SymbolicMaxBECount =
isa<SCEVCouldNotCompute>(BECount) ? ConstantMaxBECount : BECount;
return ExitLimit(BECount, ConstantMaxBECount, SymbolicMaxBECount, MaxOrZero,
More information about the llvm-commits
mailing list