[llvm] Revert "Redesign Straight-Line Strength Reduction (SLSR) (#162930)" (PR #169546)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 25 10:42:48 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Alan Li (lialan)
<details>
<summary>Changes</summary>
This reverts commit f67409c3ec7cd45c55656c8159bc42b3918f1116.
cc @<!-- -->fiigii
Including us, several separate groups are experiencing regressions with this change. This is the smallest reproducer pasted by @<!-- -->akuegel : https://github.com/llvm/llvm-project/pull/162930#issuecomment-3574307330
---
Patch is 175.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169546.diff
17 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp (+276-864)
- (modified) llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll (+11-9)
- (modified) llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/idot2.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/idot4s.ll (+79-82)
- (modified) llvm/test/CodeGen/AMDGPU/idot8u.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll (+234-234)
- (modified) llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll (+27-23)
- (modified) llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll (+196-133)
- (modified) llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/pr23975.ll (+1-1)
- (modified) llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll (+5-5)
- (removed) llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/slsr-i8-gep.ll (-271)
- (removed) llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/slsr-var-delta.ll (-70)
- (removed) llvm/test/Transforms/StraightLineStrengthReduce/path-compression.ll (-35)
- (removed) llvm/test/Transforms/StraightLineStrengthReduce/pick-candidate.ll (-32)
- (modified) llvm/test/Transforms/StraightLineStrengthReduce/slsr-add.ll (-120)
- (modified) llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll (-149)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index e5399bdd767e2..e94ad1999e32a 100644
--- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -12,16 +12,17 @@
// effective in simplifying arithmetic statements derived from an unrolled loop.
// It can also simplify the logic of SeparateConstOffsetFromGEP.
//
-// There are many optimizations we can perform in the domain of SLSR.
-// We look for strength reduction candidates in the following forms:
+// There are many optimizations we can perform in the domain of SLSR. This file
+// for now contains only an initial step. Specifically, we look for strength
+// reduction candidates in the following forms:
//
-// Form Add: B + i * S
-// Form Mul: (B + i) * S
-// Form GEP: &B[i * S]
+// Form 1: B + i * S
+// Form 2: (B + i) * S
+// Form 3: &B[i * S]
//
// where S is an integer variable, and i is a constant integer. If we found two
// candidates S1 and S2 in the same form and S1 dominates S2, we may rewrite S2
-// in a simpler way with respect to S1 (index delta). For example,
+// in a simpler way with respect to S1. For example,
//
// S1: X = B + i * S
// S2: Y = B + i' * S => X + (i' - i) * S
@@ -34,29 +35,8 @@
//
// Note: (i' - i) * S is folded to the extent possible.
//
-// For Add and GEP forms, we can also rewrite a candidate in a simpler way
-// with respect to other dominating candidates if their B or S are different
-// but other parts are the same. For example,
-//
-// Base Delta:
-// S1: X = B + i * S
-// S2: Y = B' + i * S => X + (B' - B)
-//
-// S1: X = &B [i * S]
-// S2: Y = &B'[i * S] => X + (B' - B)
-//
-// Stride Delta:
-// S1: X = B + i * S
-// S2: Y = B + i * S' => X + i * (S' - S)
-//
-// S1: X = &B[i * S]
-// S2: Y = &B[i * S'] => X + i * (S' - S)
-//
-// PS: Stride delta rewrite on Mul form is usually non-profitable, and Base
-// delta rewrite sometimes is profitable, so we do not support them on Mul.
-//
// This rewriting is in general a good idea. The code patterns we focus on
-// usually come from loop unrolling, so the delta is likely the same
+// usually come from loop unrolling, so (i' - i) * S is likely the same
// across iterations and can be reused. When that happens, the optimized form
// takes only one add starting from the second iteration.
//
@@ -67,14 +47,19 @@
// TODO:
//
// - Floating point arithmetics when fast math is enabled.
+//
+// - SLSR may decrease ILP at the architecture level. Targets that are very
+// sensitive to ILP may want to disable it. Having SLSR to consider ILP is
+// left as future work.
+//
+// - When (i' - i) is constant but i and i' are not, we could still perform
+// SLSR.
#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
@@ -101,19 +86,16 @@
#include <cstdint>
#include <limits>
#include <list>
-#include <queue>
#include <vector>
using namespace llvm;
using namespace PatternMatch;
-#define DEBUG_TYPE "slsr"
-
static const unsigned UnknownAddressSpace =
std::numeric_limits<unsigned>::max();
DEBUG_COUNTER(StraightLineStrengthReduceCounter, "slsr-counter",
- "Controls whether rewriteCandidate is executed.");
+ "Controls whether rewriteCandidateWithBasis is executed.");
namespace {
@@ -160,23 +142,15 @@ class StraightLineStrengthReduce {
GEP, // &B[..][i * S][..]
};
- enum DKind {
- InvalidDelta, // reserved for the default constructor
- IndexDelta, // Delta is a constant from Index
- BaseDelta, // Delta is a constant or variable from Base
- StrideDelta, // Delta is a constant or variable from Stride
- };
-
Candidate() = default;
Candidate(Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
- Instruction *I, const SCEV *StrideSCEV)
- : CandidateKind(CT), Base(B), Index(Idx), Stride(S), Ins(I),
- StrideSCEV(StrideSCEV) {}
+ Instruction *I)
+ : CandidateKind(CT), Base(B), Index(Idx), Stride(S), Ins(I) {}
Kind CandidateKind = Invalid;
const SCEV *Base = nullptr;
- // TODO: Swap Index and Stride's name.
+
// Note that Index and Stride of a GEP candidate do not necessarily have the
// same integer type. In that case, during rewriting, Stride will be
// sign-extended or truncated to Index's type.
@@ -203,164 +177,22 @@ class StraightLineStrengthReduce {
// Points to the immediate basis of this candidate, or nullptr if we cannot
// find any basis for this candidate.
Candidate *Basis = nullptr;
-
- DKind DeltaKind = InvalidDelta;
-
- // Store SCEV of Stride to compute delta from different strides
- const SCEV *StrideSCEV = nullptr;
-
- // Points to (Y - X) that will be used to rewrite this candidate.
- Value *Delta = nullptr;
-
- /// Cost model: Evaluate the computational efficiency of the candidate.
- ///
- /// Efficiency levels (higher is better):
- /// ZeroInst (5) - [Variable] or [Const]
- /// OneInstOneVar (4) - [Variable + Const] or [Variable * Const]
- /// OneInstTwoVar (3) - [Variable + Variable] or [Variable * Variable]
- /// TwoInstOneVar (2) - [Const + Const * Variable]
- /// TwoInstTwoVar (1) - [Variable + Const * Variable]
- enum EfficiencyLevel : unsigned {
- Unknown = 0,
- TwoInstTwoVar = 1,
- TwoInstOneVar = 2,
- OneInstTwoVar = 3,
- OneInstOneVar = 4,
- ZeroInst = 5
- };
-
- static EfficiencyLevel
- getComputationEfficiency(Kind CandidateKind, const ConstantInt *Index,
- const Value *Stride, const SCEV *Base = nullptr) {
- bool IsConstantBase = false;
- bool IsZeroBase = false;
- // When evaluating the efficiency of a rewrite, if the Base's SCEV is
- // not available, conservatively assume the base is not constant.
- if (auto *ConstBase = dyn_cast_or_null<SCEVConstant>(Base)) {
- IsConstantBase = true;
- IsZeroBase = ConstBase->getValue()->isZero();
- }
-
- bool IsConstantStride = isa<ConstantInt>(Stride);
- bool IsZeroStride =
- IsConstantStride && cast<ConstantInt>(Stride)->isZero();
- // All constants
- if (IsConstantBase && IsConstantStride)
- return ZeroInst;
-
- // (Base + Index) * Stride
- if (CandidateKind == Mul) {
- if (IsZeroStride)
- return ZeroInst;
- if (Index->isZero())
- return (IsConstantStride || IsConstantBase) ? OneInstOneVar
- : OneInstTwoVar;
-
- if (IsConstantBase)
- return IsZeroBase && (Index->isOne() || Index->isMinusOne())
- ? ZeroInst
- : OneInstOneVar;
-
- if (IsConstantStride) {
- auto *CI = cast<ConstantInt>(Stride);
- return (CI->isOne() || CI->isMinusOne()) ? OneInstOneVar
- : TwoInstOneVar;
- }
- return TwoInstTwoVar;
- }
-
- // Base + Index * Stride
- assert(CandidateKind == Add || CandidateKind == GEP);
- if (Index->isZero() || IsZeroStride)
- return ZeroInst;
-
- bool IsSimpleIndex = Index->isOne() || Index->isMinusOne();
-
- if (IsConstantBase)
- return IsZeroBase ? (IsSimpleIndex ? ZeroInst : OneInstOneVar)
- : (IsSimpleIndex ? OneInstOneVar : TwoInstOneVar);
-
- if (IsConstantStride)
- return IsZeroStride ? ZeroInst : OneInstOneVar;
-
- if (IsSimpleIndex)
- return OneInstTwoVar;
-
- return TwoInstTwoVar;
- }
-
- // Evaluate if the given delta is profitable to rewrite this candidate.
- bool isProfitableRewrite(const Value *Delta, const DKind DeltaKind) const {
- // This function cannot accurately evaluate the profit of whole expression
- // with context. A candidate (B + I * S) cannot express whether this
- // instruction needs to compute on its own (I * S), which may be shared
- // with other candidates or may need instructions to compute.
- // If the rewritten form has the same strength, still rewrite to
- // (X + Delta) since it may expose more CSE opportunities on Delta, as
- // unrolled loops usually have identical Delta for each unrolled body.
- //
- // Note, this function should only be used on Index Delta rewrite.
- // Base and Stride delta need context info to evaluate the register
- // pressure impact from variable delta.
- return getComputationEfficiency(CandidateKind, Index, Stride, Base) <=
- getRewriteEfficiency(Delta, DeltaKind);
- }
-
- // Evaluate the rewrite efficiency of this candidate with its Basis
- EfficiencyLevel getRewriteEfficiency() const {
- return Basis ? getRewriteEfficiency(Delta, DeltaKind) : Unknown;
- }
-
- // Evaluate the rewrite efficiency of this candidate with a given delta
- EfficiencyLevel getRewriteEfficiency(const Value *Delta,
- const DKind DeltaKind) const {
- switch (DeltaKind) {
- case BaseDelta: // [X + Delta]
- return getComputationEfficiency(
- CandidateKind,
- ConstantInt::get(cast<IntegerType>(Delta->getType()), 1), Delta);
- case StrideDelta: // [X + Index * Delta]
- return getComputationEfficiency(CandidateKind, Index, Delta);
- case IndexDelta: // [X + Delta * Stride]
- return getComputationEfficiency(CandidateKind, cast<ConstantInt>(Delta),
- Stride);
- default:
- return Unknown;
- }
- }
-
- bool isHighEfficiency() const {
- return getComputationEfficiency(CandidateKind, Index, Stride, Base) >=
- OneInstOneVar;
- }
-
- // Verify that this candidate has valid delta components relative to the
- // basis
- bool hasValidDelta(const Candidate &Basis) const {
- switch (DeltaKind) {
- case IndexDelta:
- // Index differs, Base and Stride must match
- return Base == Basis.Base && StrideSCEV == Basis.StrideSCEV;
- case StrideDelta:
- // Stride differs, Base and Index must match
- return Base == Basis.Base && Index == Basis.Index;
- case BaseDelta:
- // Base differs, Stride and Index must match
- return StrideSCEV == Basis.StrideSCEV && Index == Basis.Index;
- default:
- return false;
- }
- }
};
bool runOnFunction(Function &F);
private:
- // Fetch straight-line basis for rewriting C, update C.Basis to point to it,
- // and store the delta between C and its Basis in C.Delta.
- void setBasisAndDeltaFor(Candidate &C);
+ // Returns true if Basis is a basis for C, i.e., Basis dominates C and they
+ // share the same base and stride.
+ bool isBasisFor(const Candidate &Basis, const Candidate &C);
+
// Returns whether the candidate can be folded into an addressing mode.
- bool isFoldable(const Candidate &C, TargetTransformInfo *TTI);
+ bool isFoldable(const Candidate &C, TargetTransformInfo *TTI,
+ const DataLayout *DL);
+
+ // Returns true if C is already in a simplest form and not worth being
+ // rewritten.
+ bool isSimplestForm(const Candidate &C);
// Checks whether I is in a candidate form. If so, adds all the matching forms
// to Candidates, and tries to find the immediate basis for each of them.
@@ -384,6 +216,12 @@ class StraightLineStrengthReduce {
// Allocate candidates and find bases for GetElementPtr instructions.
void allocateCandidatesAndFindBasisForGEP(GetElementPtrInst *GEP);
+ // A helper function that scales Idx with ElementSize before invoking
+ // allocateCandidatesAndFindBasis.
+ void allocateCandidatesAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx,
+ Value *S, uint64_t ElementSize,
+ Instruction *I);
+
// Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate
// basis.
void allocateCandidatesAndFindBasis(Candidate::Kind CT, const SCEV *B,
@@ -391,7 +229,13 @@ class StraightLineStrengthReduce {
Instruction *I);
// Rewrites candidate C with respect to Basis.
- void rewriteCandidate(const Candidate &C);
+ void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis);
+
+ // A helper function that factors ArrayIdx to a product of a stride and a
+ // constant index, and invokes allocateCandidatesAndFindBasis with the
+ // factorings.
+ void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize,
+ GetElementPtrInst *GEP);
// Emit code that computes the "bump" from Basis to C.
static Value *emitBump(const Candidate &Basis, const Candidate &C,
@@ -403,203 +247,12 @@ class StraightLineStrengthReduce {
TargetTransformInfo *TTI = nullptr;
std::list<Candidate> Candidates;
- // Map from SCEV to instructions that represent the value,
- // instructions are sorted in depth-first order.
- DenseMap<const SCEV *, SmallSetVector<Instruction *, 2>> SCEVToInsts;
-
- // Record the dependency between instructions. If C.Basis == B, we would have
- // {B.Ins -> {C.Ins, ...}}.
- MapVector<Instruction *, std::vector<Instruction *>> DependencyGraph;
-
- // Map between each instruction and its possible candidates.
- DenseMap<Instruction *, SmallVector<Candidate *, 3>> RewriteCandidates;
-
- // All instructions that have candidates sort in topological order based on
- // dependency graph, from roots to leaves.
- std::vector<Instruction *> SortedCandidateInsts;
-
- // Record all instructions that are already rewritten and will be removed
- // later.
- std::vector<Instruction *> DeadInstructions;
-
- // Classify candidates against Delta kind
- class CandidateDictTy {
- public:
- using CandsTy = SmallVector<Candidate *, 8>;
- using BBToCandsTy = DenseMap<const BasicBlock *, CandsTy>;
-
- private:
- // Index delta Basis must have the same (Base, StrideSCEV, Inst.Type)
- using IndexDeltaKeyTy = std::tuple<const SCEV *, const SCEV *, Type *>;
- DenseMap<IndexDeltaKeyTy, BBToCandsTy> IndexDeltaCandidates;
-
- // Base delta Basis must have the same (StrideSCEV, Index, Inst.Type)
- using BaseDeltaKeyTy = std::tuple<const SCEV *, ConstantInt *, Type *>;
- DenseMap<BaseDeltaKeyTy, BBToCandsTy> BaseDeltaCandidates;
-
- // Stride delta Basis must have the same (Base, Index, Inst.Type)
- using StrideDeltaKeyTy = std::tuple<const SCEV *, ConstantInt *, Type *>;
- DenseMap<StrideDeltaKeyTy, BBToCandsTy> StrideDeltaCandidates;
-
- public:
- // TODO: Disable index delta on GEP after we completely move
- // from typed GEP to PtrAdd.
- const BBToCandsTy *getCandidatesWithDeltaKind(const Candidate &C,
- Candidate::DKind K) const {
- assert(K != Candidate::InvalidDelta);
- if (K == Candidate::IndexDelta) {
- IndexDeltaKeyTy IndexDeltaKey(C.Base, C.StrideSCEV, C.Ins->getType());
- auto It = IndexDeltaCandidates.find(IndexDeltaKey);
- if (It != IndexDeltaCandidates.end())
- return &It->second;
- } else if (K == Candidate::BaseDelta) {
- BaseDeltaKeyTy BaseDeltaKey(C.StrideSCEV, C.Index, C.Ins->getType());
- auto It = BaseDeltaCandidates.find(BaseDeltaKey);
- if (It != BaseDeltaCandidates.end())
- return &It->second;
- } else {
- assert(K == Candidate::StrideDelta);
- StrideDeltaKeyTy StrideDeltaKey(C.Base, C.Index, C.Ins->getType());
- auto It = StrideDeltaCandidates.find(StrideDeltaKey);
- if (It != StrideDeltaCandidates.end())
- return &It->second;
- }
- return nullptr;
- }
-
- // Pointers to C must remain valid until CandidateDict is cleared.
- void add(Candidate &C) {
- Type *ValueType = C.Ins->getType();
- BasicBlock *BB = C.Ins->getParent();
- IndexDeltaKeyTy IndexDeltaKey(C.Base, C.StrideSCEV, ValueType);
- BaseDeltaKeyTy BaseDeltaKey(C.StrideSCEV, C.Index, ValueType);
- StrideDeltaKeyTy StrideDeltaKey(C.Base, C.Index, ValueType);
- IndexDeltaCandidates[IndexDeltaKey][BB].push_back(&C);
- BaseDeltaCandidates[BaseDeltaKey][BB].push_back(&C);
- StrideDeltaCandidates[StrideDeltaKey][BB].push_back(&C);
- }
- // Remove all mappings from set
- void clear() {
- IndexDeltaCandidates.clear();
- BaseDeltaCandidates.clear();
- StrideDeltaCandidates.clear();
- }
- } CandidateDict;
-
- const SCEV *getAndRecordSCEV(Value *V) {
- auto *S = SE->getSCEV(V);
- if (isa<Instruction>(V) && !(isa<SCEVCouldNotCompute>(S) ||
- isa<SCEVUnknown>(S) || isa<SCEVConstant>(S)))
- SCEVToInsts[S].insert(cast<Instruction>(V));
-
- return S;
- }
-
- // Get the nearest instruction before CI that represents the value of S,
- // return nullptr if no instruction is associated with S or S is not a
- // reusable expression.
- Value *getNearestValueOfSCEV(const SCEV *S, const Instruction *CI) const {
- if (isa<SCEVCouldNotCompute>(S))
- return nullptr;
-
- if (auto *SU = dyn_cast<SCEVUnknown>(S))
- return SU->getValue();
- if (auto *SC = dyn_cast<SCEVConstant>(S))
- return SC->getValue();
-
- auto It = SCEVToInsts.find(S);
- if (It == SCEVToInsts.end())
- return nullptr;
-
- // Instructions are sorted in depth-first order, so search for the nearest
- // instruction by walking the list in reverse order.
- for (Instruction *I : reverse(It->second))
- if (DT->dominates(I, CI))
- return I;
-
- return nullptr;
- }
-
- struct DeltaInfo {
- Candidate *Cand;
- Candidate::DKind DeltaKind;
- Value *Delta;
-
- DeltaInfo()
- : Cand(nullptr), DeltaKind(Candidate::InvalidDelta), Delta(nullptr) {}
- DeltaInfo(Candidate *Cand, Candidate::DKind DeltaKind, Value *Delta)
- : Cand(Cand), DeltaKind(DeltaKind), Delta(Delta) {}
- operator bool() const { return Cand != nullptr; }
- };
-
- friend raw_ostream &operator<<(raw_ostream &OS, const DeltaInfo &DI);
-
- DeltaInfo compressPath(Candidate &C, Candidate *Basis) const;
-
- Candidate *pickRewriteCandidate(Instruction *I) const;
- void sortCandidateInstructions();
- static Constant *getIndexDelta(Candidate &C, Candidate &Basis);
- static bool isSimilar(Candidate &C, Candidate &Basis, Candidate::DKind K);
-
- // Add Basis -> C in DependencyGraph and propagate
- // C.Stride and C.Delta's dependency to C
- void addDependency(Candidate &C, Candidate *Basis) {
- if (Basis)
- DependencyGraph[Basis->Ins].emplace_back(C.Ins);
-
- // If any candidate of Inst has a basis, then Inst will be rewritten,
- // C must be rewritten after rewriting Inst, so we need to propagate
- // the dependency to C
- auto PropagateDependency = [&](Instruction *Inst) {
- if (auto CandsIt = RewriteCandidates.find(Inst);
- CandsIt != RewriteCandidates.end() &&
- llvm::any_of(CandsIt->second,
- [](Candidate *Cand) { return Cand->Basis; }))
- DependencyGraph[Inst].emplace_back(C.Ins);
- };
-
- // If C has a variable delta and the delta is a candidate,
- // propagate its dependency to C
- if (auto *DeltaInst = dyn_cast_or_null<Instruction>(C.Delta))
- PropagateDependency(DeltaInst);
-
- // If the stride is a candidate, propagate its dependency to C
- if (auto *StrideInst = dyn_cast<Instruction>(C.Stride))
- PropagateDependency(StrideInst);
- };
+ // Temporarily holds all instructions that are unlinked (but not deleted) by
+ // rewriteCandidateWithBasis. These instructions will be actually removed
+ // after all rewriting finishes.
+ std::vector<Instruction *> UnlinkedInstructions;
};
-inline raw_ostream &operator<<(raw_ostream &OS,
- ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/169546
More information about the llvm-commits
mailing list