[llvm] [LoopFusion] Extending SIV to handle separate loops (PR #146383)
Alireza Torabian via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 18 15:54:15 PDT 2025
https://github.com/1997alireza updated https://github.com/llvm/llvm-project/pull/146383
>From 81d03000c5eb4c78377152c111cf103beabf0b26 Mon Sep 17 00:00:00 2001
From: Alireza Torabian <alireza.torabian at huawei.com>
Date: Mon, 24 Feb 2025 11:53:53 -0500
Subject: [PATCH 1/2] [DependenceAnalysis] Extending SIV to handle fusable
loops
When there is a dependency between two memory instructions in separate
fusable loops, SIV will be able to test them and compute the direction
and the distance of the dependency. Two loop levels are considered
fusable if they have the same tripcount and depth.
---
.../llvm/Analysis/DependenceAnalysis.h | 185 +++++--
llvm/lib/Analysis/DependenceAnalysis.cpp | 459 ++++++++++++------
.../Analysis/DependenceAnalysis/SIDLoops.ll | 262 ++++++++++
3 files changed, 700 insertions(+), 206 deletions(-)
create mode 100644 llvm/test/Analysis/DependenceAnalysis/SIDLoops.ll
diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
index f66c79d915665..744e7c01a83f6 100644
--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -82,6 +82,16 @@ class LLVM_ABI Dependence {
/// Dependence::DVEntry - Each level in the distance/direction vector
/// has a direction (or perhaps a union of several directions), and
/// perhaps a distance.
+ /// The dependency information could be across a single loop level or across
+ /// two separate levels that have the same trip count and nesting depth,
+ /// which helps to provide information for loop fusion candidation.
+ /// For example, loops b and c have the same iteration count and depth:
+ /// for (a = ...) {
+ /// for (b = 0; b < 10; b++) {
+ /// }
+ /// for (c = 0; c < 10; c++) {
+ /// }
+ /// }
struct DVEntry {
enum : unsigned char {
NONE = 0,
@@ -144,12 +154,25 @@ class LLVM_ABI Dependence {
/// source and destination of the dependence.
virtual unsigned getLevels() const { return 0; }
- /// getDirection - Returns the direction associated with a particular level.
- virtual unsigned getDirection(unsigned Level) const { return DVEntry::ALL; }
+ /// getSIDStrictLevels - Returns the number of separate SID loops surrounding
+ /// the source and destination of the dependence.
+ virtual unsigned getSIDStrictLevels() const { return 0; }
- /// getDistance - Returns the distance (or NULL) associated with a particular
- /// level.
- virtual const SCEV *getDistance(unsigned Level) const { return nullptr; }
+ /// getDVEntry - Returns the DV entry associated with a regular or a
+ /// SID level
+ DVEntry getDVEntry(unsigned Level, bool SID) const;
+
+ /// getDirection - Returns the direction associated with a particular
+ /// common or SID level.
+ virtual unsigned getDirection(unsigned Level, bool SID = false) const {
+ return DVEntry::ALL;
+ }
+
+ /// getDistance - Returns the distance (or NULL) associated with a
+ /// particular common or SID level.
+ virtual const SCEV *getDistance(unsigned Level, bool SID = false) const {
+ return nullptr;
+ }
/// Check if the direction vector is negative. A negative direction
/// vector means Src and Dst are reversed in the actual program.
@@ -162,21 +185,32 @@ class LLVM_ABI Dependence {
virtual bool normalize(ScalarEvolution *SE) { return false; }
/// isPeelFirst - Returns true if peeling the first iteration from
- /// this loop will break this dependence.
- virtual bool isPeelFirst(unsigned Level) const { return false; }
+ /// this regular or SID loop level will break this dependence.
+ virtual bool isPeelFirst(unsigned Level, bool SID = false) const {
+ return false;
+ }
/// isPeelLast - Returns true if peeling the last iteration from
- /// this loop will break this dependence.
- virtual bool isPeelLast(unsigned Level) const { return false; }
+ /// this regular or SID loop level will break this dependence.
+ virtual bool isPeelLast(unsigned Level, bool SID = false) const {
+ return false;
+ }
- /// isSplitable - Returns true if splitting this loop will break the
- /// dependence.
- virtual bool isSplitable(unsigned Level) const { return false; }
+ /// isSplitable - Returns true if splitting the loop will break
+ /// the dependence.
+ virtual bool isSplitable(unsigned Level, bool SID = false) const {
+ return false;
+ }
- /// isScalar - Returns true if a particular level is scalar; that is,
- /// if no subscript in the source or destination mention the induction
- /// variable associated with the loop at this level.
- virtual bool isScalar(unsigned Level) const;
+ /// inSIDLoops - Returns true if this level is an SID level, i.e.,
+ /// performed across two separate loop nests that have the Same Iteration and
+ /// Depth.
+ virtual bool inSIDLoops(unsigned Level) const { return false; }
+
+ /// isScalar - Returns true if a particular regular or SID level is
+ /// scalar; that is, if no subscript in the source or destination mention
+ /// the induction variable associated with the loop at this level.
+ virtual bool isScalar(unsigned Level, bool SID = false) const;
/// getNextPredecessor - Returns the value of the NextPredecessor field.
const Dependence *getNextPredecessor() const { return NextPredecessor; }
@@ -198,6 +232,10 @@ class LLVM_ABI Dependence {
/// dump - For debugging purposes, dumps a dependence to OS.
void dump(raw_ostream &OS) const;
+ /// dumpImp - For debugging purposes. Dumps a dependence to OS with or
+ /// without considering the SID levels.
+ void dumpImp(raw_ostream &OS, bool SID = false) const;
+
protected:
Instruction *Src, *Dst;
@@ -238,13 +276,30 @@ class LLVM_ABI FullDependence final : public Dependence {
/// source and destination of the dependence.
unsigned getLevels() const override { return Levels; }
+ /// getSIDStrictLevels - Returns the number of separate SID loops surrounding
+ /// the source and destination of the dependence.
+ unsigned getSIDStrictLevels() const override { return SIDStrictLevels; }
+
+ /// getDVEntry - Returns the DV entry associated with a regular or a
+ /// SID level.
+ DVEntry getDVEntry(unsigned Level, bool SID) const {
+ if (!SID) {
+ assert(0 < Level && Level <= Levels && "Level out of range");
+ return DV[Level - 1];
+ } else {
+ assert(Levels < Level && Level <= Levels + SIDStrictLevels &&
+ "SID level out of range");
+ return DVSID[Level - Levels - 1];
+ }
+ }
+
/// getDirection - Returns the direction associated with a particular
- /// level.
- unsigned getDirection(unsigned Level) const override;
+ /// common or SID level.
+ unsigned getDirection(unsigned Level, bool SID = false) const override;
/// getDistance - Returns the distance (or NULL) associated with a
- /// particular level.
- const SCEV *getDistance(unsigned Level) const override;
+ /// particular common or SID level.
+ const SCEV *getDistance(unsigned Level, bool SID = false) const override;
/// Check if the direction vector is negative. A negative direction
/// vector means Src and Dst are reversed in the actual program.
@@ -257,27 +312,34 @@ class LLVM_ABI FullDependence final : public Dependence {
bool normalize(ScalarEvolution *SE) override;
/// isPeelFirst - Returns true if peeling the first iteration from
- /// this loop will break this dependence.
- bool isPeelFirst(unsigned Level) const override;
+ /// this regular or SID loop level will break this dependence.
+ bool isPeelFirst(unsigned Level, bool SID = false) const override;
/// isPeelLast - Returns true if peeling the last iteration from
- /// this loop will break this dependence.
- bool isPeelLast(unsigned Level) const override;
+ /// this regular or SID loop level will break this dependence.
+ bool isPeelLast(unsigned Level, bool SID = false) const override;
/// isSplitable - Returns true if splitting the loop will break
/// the dependence.
- bool isSplitable(unsigned Level) const override;
+ bool isSplitable(unsigned Level, bool SID = false) const override;
+
+ /// inSIDLoops - Returns true if this level is an SID level, i.e.,
+ /// performed across two separate loop nests that have the Same Iteration and
+ /// Depth.
+ bool inSIDLoops(unsigned Level) const override;
- /// isScalar - Returns true if a particular level is scalar; that is,
- /// if no subscript in the source or destination mention the induction
- /// variable associated with the loop at this level.
- bool isScalar(unsigned Level) const override;
+ /// isScalar - Returns true if a particular regular or SID level is
+ /// scalar; that is, if no subscript in the source or destination mention
+ /// the induction variable associated with the loop at this level.
+ bool isScalar(unsigned Level, bool SID = false) const override;
private:
unsigned short Levels;
+ unsigned short SIDStrictLevels;
bool LoopIndependent;
bool Consistent; // Init to true, then refine.
std::unique_ptr<DVEntry[]> DV;
+ std::unique_ptr<DVEntry[]> DVSID; // DV entries on SID levels
friend class DependenceInfo;
};
@@ -406,7 +468,8 @@ class DependenceInfo {
const SCEV *A;
const SCEV *B;
const SCEV *C;
- const Loop *AssociatedLoop;
+ const Loop *AssociatedSrcLoop;
+ const Loop *AssociatedDstLoop;
public:
/// isEmpty - Return true if the constraint is of kind Empty.
@@ -450,19 +513,27 @@ class DependenceInfo {
/// Otherwise assert.
LLVM_ABI const SCEV *getD() const;
- /// getAssociatedLoop - Returns the loop associated with this constraint.
- LLVM_ABI const Loop *getAssociatedLoop() const;
+ /// getAssociatedSrcLoop - Returns the source loop associated with this
+ /// constraint.
+ LLVM_ABI const Loop *getAssociatedSrcLoop() const;
+
+ /// getAssociatedDstLoop - Returns the destination loop associated with
+ /// this constraint.
+ LLVM_ABI const Loop *getAssociatedDstLoop() const;
/// setPoint - Change a constraint to Point.
LLVM_ABI void setPoint(const SCEV *X, const SCEV *Y,
- const Loop *CurrentLoop);
+ const Loop *CurrentSrcLoop,
+ const Loop *CurrentDstLoop);
/// setLine - Change a constraint to Line.
LLVM_ABI void setLine(const SCEV *A, const SCEV *B, const SCEV *C,
- const Loop *CurrentLoop);
+ const Loop *CurrentSrcLoop,
+ const Loop *CurrentDstLoop);
/// setDistance - Change a constraint to Distance.
- LLVM_ABI void setDistance(const SCEV *D, const Loop *CurrentLoop);
+ LLVM_ABI void setDistance(const SCEV *D, const Loop *CurrentSrcLoop,
+ const Loop *CurrentDstLoop);
/// setEmpty - Change a constraint to Empty.
LLVM_ABI void setEmpty();
@@ -475,6 +546,9 @@ class DependenceInfo {
LLVM_ABI void dump(raw_ostream &OS) const;
};
+ /// Returns true if two loops have Same Iteration and Depth.
+ bool isSID(const Loop *SrcLoop, const Loop *DstLoop) const;
+
/// establishNestingLevels - Examines the loop nesting of the Src and Dst
/// instructions and establishes their shared loops. Sets the variables
/// CommonLevels, SrcLevels, and MaxLevels.
@@ -525,9 +599,21 @@ class DependenceInfo {
/// e - 5
/// f - 6
/// g - 7 = MaxLevels
+ /// SIDStrictLevels counts the number of levels after common levels that are
+ /// not common but have the same iteration count and depth. Internally this
+ /// is checked using isSID. Assume that in this code fragment, levels c and e
+ /// have the same iteration count and depth, but levels d and f does not.
+ /// Then SIDStrictLevels is set to 1.
+ /// In that case the level numbers for the previous code look like
+ /// a - 1
+ /// b - 2
+ /// c,e - 3 = CommonLevels
+ /// d - 4 = SrcLevels
+ /// f - 5
+ /// g - 6 = MaxLevels
void establishNestingLevels(const Instruction *Src, const Instruction *Dst);
- unsigned CommonLevels, SrcLevels, MaxLevels;
+ unsigned CommonLevels, SrcLevels, MaxLevels, SIDStrictLevels;
/// mapSrcLoop - Given one of the loops containing the source, return
/// its level index in our numbering scheme.
@@ -652,9 +738,9 @@ class DependenceInfo {
/// If there might be a dependence, returns false.
/// Sets appropriate direction and distance.
bool strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
- const SCEV *DstConst, const Loop *CurrentLoop,
- unsigned Level, FullDependence &Result,
- Constraint &NewConstraint) const;
+ const SCEV *DstConst, const Loop *CurrentSrcLoop,
+ const Loop *CurrentDstLoop, unsigned Level,
+ FullDependence &Result, Constraint &NewConstraint) const;
/// weakCrossingSIVtest - Tests the weak-crossing SIV subscript pair
/// (Src and Dst) for dependence.
@@ -667,9 +753,9 @@ class DependenceInfo {
/// Set consistent to false.
/// Marks the dependence as splitable.
bool weakCrossingSIVtest(const SCEV *SrcCoeff, const SCEV *SrcConst,
- const SCEV *DstConst, const Loop *CurrentLoop,
- unsigned Level, FullDependence &Result,
- Constraint &NewConstraint,
+ const SCEV *DstConst, const Loop *CurrentSrcLoop,
+ const Loop *CurrentDstLoop, unsigned Level,
+ FullDependence &Result, Constraint &NewConstraint,
const SCEV *&SplitIter) const;
/// ExactSIVtest - Tests the SIV subscript pair
@@ -683,8 +769,9 @@ class DependenceInfo {
/// Set consistent to false.
bool exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
const SCEV *SrcConst, const SCEV *DstConst,
- const Loop *CurrentLoop, unsigned Level,
- FullDependence &Result, Constraint &NewConstraint) const;
+ const Loop *CurrentSrcLoop, const Loop *CurrentDstLoop,
+ unsigned Level, FullDependence &Result,
+ Constraint &NewConstraint) const;
/// weakZeroSrcSIVtest - Tests the weak-zero SIV subscript pair
/// (Src and Dst) for dependence.
@@ -697,8 +784,9 @@ class DependenceInfo {
/// Set consistent to false.
/// If loop peeling will break the dependence, mark appropriately.
bool weakZeroSrcSIVtest(const SCEV *DstCoeff, const SCEV *SrcConst,
- const SCEV *DstConst, const Loop *CurrentLoop,
- unsigned Level, FullDependence &Result,
+ const SCEV *DstConst, const Loop *CurrentSrcLoop,
+ const Loop *CurrentDstLoop, unsigned Level,
+ FullDependence &Result,
Constraint &NewConstraint) const;
/// weakZeroDstSIVtest - Tests the weak-zero SIV subscript pair
@@ -712,8 +800,9 @@ class DependenceInfo {
/// Set consistent to false.
/// If loop peeling will break the dependence, mark appropriately.
bool weakZeroDstSIVtest(const SCEV *SrcCoeff, const SCEV *SrcConst,
- const SCEV *DstConst, const Loop *CurrentLoop,
- unsigned Level, FullDependence &Result,
+ const SCEV *DstConst, const Loop *CurrentSrcLoop,
+ const Loop *CurrentDstLoop, unsigned Level,
+ FullDependence &Result,
Constraint &NewConstraint) const;
/// exactRDIVtest - Tests the RDIV subscript pair for dependence.
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 0f77a1410e83b..5d50e8cce0b69 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -104,6 +104,7 @@ STATISTIC(GCDindependence, "GCD independence");
STATISTIC(BanerjeeApplications, "Banerjee applications");
STATISTIC(BanerjeeIndependence, "Banerjee independence");
STATISTIC(BanerjeeSuccesses, "Banerjee successes");
+STATISTIC(SIDLoopsCount, "Loops with Same Iteration count and Depth");
static cl::opt<bool>
Delinearize("da-delinearize", cl::init(true), cl::Hidden,
@@ -274,7 +275,7 @@ bool Dependence::isAnti() const {
// if no subscript in the source or destination mention the induction
// variable associated with the loop at this level.
// Leave this out of line, so it will serve as a virtual method anchor
-bool Dependence::isScalar(unsigned level) const { return false; }
+bool Dependence::isScalar(unsigned level, bool SID) const { return false; }
//===----------------------------------------------------------------------===//
// FullDependence methods
@@ -286,6 +287,7 @@ FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
: Dependence(Source, Destination, Assumes), Levels(CommonLevels),
LoopIndependent(PossiblyLoopIndependent) {
Consistent = true;
+ SIDStrictLevels = 0;
if (CommonLevels)
DV = std::make_unique<DVEntry[]>(CommonLevels);
}
@@ -347,44 +349,48 @@ bool FullDependence::normalize(ScalarEvolution *SE) {
// The rest are simple getters that hide the implementation.
-// getDirection - Returns the direction associated with a particular level.
-unsigned FullDependence::getDirection(unsigned Level) const {
- assert(0 < Level && Level <= Levels && "Level out of range");
- return DV[Level - 1].Direction;
+// getDirection - Returns the direction associated with a particular common or
+// SID level.
+unsigned FullDependence::getDirection(unsigned Level, bool SID) const {
+ return getDVEntry(Level, SID).Direction;
}
-// Returns the distance (or NULL) associated with a particular level.
-const SCEV *FullDependence::getDistance(unsigned Level) const {
- assert(0 < Level && Level <= Levels && "Level out of range");
- return DV[Level - 1].Distance;
+// Returns the distance (or NULL) associated with a particular common or
+// SID level.
+const SCEV *FullDependence::getDistance(unsigned Level, bool SID) const {
+ return getDVEntry(Level, SID).Distance;
}
-// Returns true if a particular level is scalar; that is,
-// if no subscript in the source or destination mention the induction
-// variable associated with the loop at this level.
-bool FullDependence::isScalar(unsigned Level) const {
- assert(0 < Level && Level <= Levels && "Level out of range");
- return DV[Level - 1].Scalar;
+// Returns true if a particular regular or SID level is scalar; that is,
+// if no subscript in the source or destination mention the induction variable
+// associated with the loop at this level.
+bool FullDependence::isScalar(unsigned Level, bool SID) const {
+ return getDVEntry(Level, SID).Scalar;
+}
+
+// Returns true if peeling the first iteration from this regular or SID
+// loop level will break this dependence.
+bool FullDependence::isPeelFirst(unsigned Level, bool SID) const {
+ return getDVEntry(Level, SID).PeelFirst;
}
-// Returns true if peeling the first iteration from this loop
-// will break this dependence.
-bool FullDependence::isPeelFirst(unsigned Level) const {
- assert(0 < Level && Level <= Levels && "Level out of range");
- return DV[Level - 1].PeelFirst;
+// Returns true if peeling the last iteration from this regular or SID
+// loop level will break this dependence.
+bool FullDependence::isPeelLast(unsigned Level, bool SID) const {
+ return getDVEntry(Level, SID).PeelLast;
}
-// Returns true if peeling the last iteration from this loop
-// will break this dependence.
-bool FullDependence::isPeelLast(unsigned Level) const {
- assert(0 < Level && Level <= Levels && "Level out of range");
- return DV[Level - 1].PeelLast;
+// Returns true if splitting loop will break the dependence.
+bool FullDependence::isSplitable(unsigned Level, bool SID) const {
+ return getDVEntry(Level, SID).Splitable;
}
-// Returns true if splitting this loop will break the dependence.
-bool FullDependence::isSplitable(unsigned Level) const {
- assert(0 < Level && Level <= Levels && "Level out of range");
- return DV[Level - 1].Splitable;
+// inSIDLoops - Returns true if this level is an SID level, i.e.,
+// performed across two separate loop nests that have the Same Iteration and
+// Depth.
+bool FullDependence::inSIDLoops(unsigned Level) const {
+ assert(0 < Level && Level <= Levels + SIDStrictLevels && "Level out of range");
+ return Level > Levels;
}
//===----------------------------------------------------------------------===//
@@ -435,37 +441,50 @@ const SCEV *DependenceInfo::Constraint::getD() const {
return SE->getNegativeSCEV(C);
}
-// Returns the loop associated with this constraint.
-const Loop *DependenceInfo::Constraint::getAssociatedLoop() const {
+// Returns the source loop associated with this constraint.
+const Loop *DependenceInfo::Constraint::getAssociatedSrcLoop() const {
assert((Kind == Distance || Kind == Line || Kind == Point) &&
"Kind should be Distance, Line, or Point");
- return AssociatedLoop;
+ return AssociatedSrcLoop;
+}
+
+// Returns the destination loop associated with this constraint.
+const Loop *DependenceInfo::Constraint::getAssociatedDstLoop() const {
+ assert((Kind == Distance || Kind == Line || Kind == Point) &&
+ "Kind should be Distance, Line, or Point");
+ return AssociatedDstLoop;
}
void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y,
- const Loop *CurLoop) {
+ const Loop *CurSrcLoop,
+ const Loop *CurDstLoop) {
Kind = Point;
A = X;
B = Y;
- AssociatedLoop = CurLoop;
+ AssociatedSrcLoop = CurSrcLoop;
+ AssociatedDstLoop = CurDstLoop;
}
void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB,
- const SCEV *CC, const Loop *CurLoop) {
+ const SCEV *CC, const Loop *CurSrcLoop,
+ const Loop *CurDstLoop) {
Kind = Line;
A = AA;
B = BB;
C = CC;
- AssociatedLoop = CurLoop;
+ AssociatedSrcLoop = CurSrcLoop;
+ AssociatedDstLoop = CurDstLoop;
}
void DependenceInfo::Constraint::setDistance(const SCEV *D,
- const Loop *CurLoop) {
+ const Loop *CurSrcLoop,
+ const Loop *CurDstLoop) {
Kind = Distance;
A = SE->getOne(D->getType());
B = SE->getNegativeSCEV(A);
C = SE->getNegativeSCEV(D);
- AssociatedLoop = CurLoop;
+ AssociatedSrcLoop = CurSrcLoop;
+ AssociatedDstLoop = CurDstLoop;
}
void DependenceInfo::Constraint::setEmpty() { Kind = Empty; }
@@ -611,7 +630,7 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
return true;
}
if (const SCEVConstant *CUB = collectConstantUpperBound(
- X->getAssociatedLoop(), Prod1->getType())) {
+ X->getAssociatedSrcLoop(), Prod1->getType())) {
const APInt &UpperBound = CUB->getAPInt();
LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
@@ -621,7 +640,7 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
}
}
X->setPoint(SE->getConstant(Xq), SE->getConstant(Yq),
- X->getAssociatedLoop());
+ X->getAssociatedSrcLoop(), X->getAssociatedDstLoop());
++DeltaSuccesses;
return true;
}
@@ -655,7 +674,6 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
// For debugging purposes. Dumps a dependence to OS.
void Dependence::dump(raw_ostream &OS) const {
- bool Splitable = false;
if (isConfused())
OS << "confused";
else {
@@ -669,41 +687,12 @@ void Dependence::dump(raw_ostream &OS) const {
OS << "anti";
else if (isInput())
OS << "input";
- unsigned Levels = getLevels();
- OS << " [";
- for (unsigned II = 1; II <= Levels; ++II) {
- if (isSplitable(II))
- Splitable = true;
- if (isPeelFirst(II))
- OS << 'p';
- const SCEV *Distance = getDistance(II);
- if (Distance)
- OS << *Distance;
- else if (isScalar(II))
- OS << "S";
- else {
- unsigned Direction = getDirection(II);
- if (Direction == DVEntry::ALL)
- OS << "*";
- else {
- if (Direction & DVEntry::LT)
- OS << "<";
- if (Direction & DVEntry::EQ)
- OS << "=";
- if (Direction & DVEntry::GT)
- OS << ">";
- }
- }
- if (isPeelLast(II))
- OS << 'p';
- if (II < Levels)
- OS << " ";
+ dumpImp(OS);
+ unsigned SIDStrictLevels = getSIDStrictLevels();
+ if (SIDStrictLevels > 0) {
+ OS << "! / assuming " << SIDStrictLevels << " loop level(s) fused: ";
+ dumpImp(OS, true);
}
- if (isLoopIndependent())
- OS << "|<";
- OS << "]";
- if (Splitable)
- OS << " splitable";
}
OS << "!\n";
@@ -714,6 +703,54 @@ void Dependence::dump(raw_ostream &OS) const {
}
}
+// For debugging purposes. Dumps a dependence to OS with or without considering
+// the SID levels.
+void Dependence::dumpImp(raw_ostream &OS, bool SID) const {
+ bool Splitable = false;
+ unsigned Levels = getLevels();
+ unsigned SIDStrictLevels = getSIDStrictLevels();
+ bool OnSID = false;
+ unsigned LevelNum = Levels;
+ if (SID)
+ LevelNum += SIDStrictLevels;
+ OS << " [";
+ for (unsigned II = 1; II <= LevelNum; ++II) {
+ if (!OnSID && inSIDLoops(II))
+ OnSID = true;
+ if (isSplitable(II, OnSID))
+ Splitable = true;
+ if (isPeelFirst(II, OnSID))
+ OS << 'p';
+ const SCEV *Distance = getDistance(II, OnSID);
+ if (Distance)
+ OS << *Distance;
+ else if (isScalar(II, OnSID))
+ OS << "S";
+ else {
+ unsigned Direction = getDirection(II, OnSID);
+ if (Direction == DVEntry::ALL)
+ OS << "*";
+ else {
+ if (Direction & DVEntry::LT)
+ OS << "<";
+ if (Direction & DVEntry::EQ)
+ OS << "=";
+ if (Direction & DVEntry::GT)
+ OS << ">";
+ }
+ }
+ if (isPeelLast(II, OnSID))
+ OS << 'p';
+ if (II < LevelNum)
+ OS << " ";
+ }
+ if (isLoopIndependent())
+ OS << "|<";
+ OS << "]";
+ if (Splitable)
+ OS << " splitable";
+}
+
// Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
// underlaying objects. If LocA and LocB are known to not alias (for any reason:
// tbaa, non-overlapping regions etc), then it is known there is no dependecy.
@@ -762,6 +799,32 @@ static bool isLoadOrStore(const Instruction *I) {
return false;
}
+// Returns true if two loops have Same Iteration and Depth.
+bool DependenceInfo::isSID(const Loop *SrcLoop,
+ const Loop *DstLoop) const {
+ if (SrcLoop == DstLoop)
+ return true;
+
+ if (SrcLoop->getLoopDepth() != DstLoop->getLoopDepth())
+ return false;
+
+ if (!SrcLoop || !SrcLoop->getLoopLatch() || !DstLoop ||
+ !DstLoop->getLoopLatch())
+ return false;
+
+ const SCEV *SrcUB = nullptr, *DstUP = nullptr;
+ if (SE->hasLoopInvariantBackedgeTakenCount(SrcLoop))
+ SrcUB = SE->getBackedgeTakenCount(SrcLoop);
+ if (SE->hasLoopInvariantBackedgeTakenCount(DstLoop))
+ DstUP = SE->getBackedgeTakenCount(DstLoop);
+
+ if (SrcUB != nullptr && DstUP != nullptr &&
+ SE->isKnownPredicate(ICmpInst::ICMP_EQ, SrcUB, DstUP))
+ return true;
+
+ return false;
+}
+
// Examines the loop nesting of the Src and Dst
// instructions and establishes their shared loops. Sets the variables
// CommonLevels, SrcLevels, and MaxLevels.
@@ -812,6 +875,18 @@ static bool isLoadOrStore(const Instruction *I) {
// e - 5
// f - 6
// g - 7 = MaxLevels
+// SIDStrictLevels counts the number of levels after common levels that are
+// not common but have the same iteration count and depth. Internally this
+// is checked using isSID. Assume that in this code fragment, levels c and e
+// have the same iteration count and depth, but levels d and f does not.
+// Then SIDStrictLevels is set to 1.
+// In that case the level numbers for the previous code look like
+// a - 1
+// b - 2
+// c,e - 3 = CommonLevels
+// d - 4 = SrcLevels
+// f - 5
+// g - 6 = MaxLevels
void DependenceInfo::establishNestingLevels(const Instruction *Src,
const Instruction *Dst) {
const BasicBlock *SrcBlock = Src->getParent();
@@ -822,6 +897,7 @@ void DependenceInfo::establishNestingLevels(const Instruction *Src,
const Loop *DstLoop = LI->getLoopFor(DstBlock);
SrcLevels = SrcLevel;
MaxLevels = SrcLevel + DstLevel;
+ SIDStrictLevels = 0;
while (SrcLevel > DstLevel) {
SrcLoop = SrcLoop->getParentLoop();
SrcLevel--;
@@ -830,7 +906,12 @@ void DependenceInfo::establishNestingLevels(const Instruction *Src,
DstLoop = DstLoop->getParentLoop();
DstLevel--;
}
+
+ // find the first common level and count the SID levels leading to it
while (SrcLoop != DstLoop) {
+ SIDStrictLevels++;
+ if (!isSID(SrcLoop, DstLoop))
+ SIDStrictLevels = 0;
SrcLoop = SrcLoop->getParentLoop();
DstLoop = DstLoop->getParentLoop();
SrcLevel--;
@@ -1227,8 +1308,9 @@ bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
//
// Return true if dependence disproved.
bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
- const SCEV *DstConst, const Loop *CurLoop,
- unsigned Level, FullDependence &Result,
+ const SCEV *DstConst, const Loop *CurSrcLoop,
+ const Loop *CurDstLoop, unsigned Level,
+ FullDependence &Result,
Constraint &NewConstraint) const {
LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff);
@@ -1246,7 +1328,8 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
// check that |Delta| < iteration count
- if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+ if (const SCEV *UpperBound =
+ collectUpperBound(CurSrcLoop, Delta->getType())) {
LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound);
LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
const SCEV *AbsDelta =
@@ -1279,7 +1362,8 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
return true;
}
Result.DV[Level].Distance = SE->getConstant(Distance);
- NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
+ NewConstraint.setDistance(SE->getConstant(Distance), CurSrcLoop,
+ CurDstLoop);
if (Distance.sgt(0))
Result.DV[Level].Direction &= Dependence::DVEntry::LT;
else if (Distance.slt(0))
@@ -1290,18 +1374,18 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
} else if (Delta->isZero()) {
// since 0/X == 0
Result.DV[Level].Distance = Delta;
- NewConstraint.setDistance(Delta, CurLoop);
+ NewConstraint.setDistance(Delta, CurSrcLoop, CurDstLoop);
Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
++StrongSIVsuccesses;
} else {
if (Coeff->isOne()) {
LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n");
Result.DV[Level].Distance = Delta; // since X/1 == X
- NewConstraint.setDistance(Delta, CurLoop);
+ NewConstraint.setDistance(Delta, CurSrcLoop, CurDstLoop);
} else {
Result.Consistent = false;
NewConstraint.setLine(Coeff, SE->getNegativeSCEV(Coeff),
- SE->getNegativeSCEV(Delta), CurLoop);
+ SE->getNegativeSCEV(Delta), CurSrcLoop, CurDstLoop);
}
// maybe we can get a useful direction
@@ -1359,8 +1443,9 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
// Return true if dependence disproved.
bool DependenceInfo::weakCrossingSIVtest(
const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst,
- const Loop *CurLoop, unsigned Level, FullDependence &Result,
- Constraint &NewConstraint, const SCEV *&SplitIter) const {
+ const Loop *CurSrcLoop, const Loop *CurDstLoop, unsigned Level,
+ FullDependence &Result, Constraint &NewConstraint,
+ const SCEV *&SplitIter) const {
LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");
LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
@@ -1371,7 +1456,7 @@ bool DependenceInfo::weakCrossingSIVtest(
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
- NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
+ NewConstraint.setLine(Coeff, Coeff, Delta, CurSrcLoop, CurDstLoop);
if (Delta->isZero()) {
Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
@@ -1419,7 +1504,8 @@ bool DependenceInfo::weakCrossingSIVtest(
// We're certain that Delta > 0 and ConstCoeff > 0.
// Check Delta/(2*ConstCoeff) against upper loop bound
- if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+ if (const SCEV *UpperBound =
+ collectUpperBound(CurSrcLoop, Delta->getType())) {
LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
const SCEV *ML =
@@ -1614,7 +1700,8 @@ inferDomainOfAffine(const APInt &A, const APInt &B,
// returns all the dependencies that exist between Dst and Src.
bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
const SCEV *SrcConst, const SCEV *DstConst,
- const Loop *CurLoop, unsigned Level,
+ const Loop *CurSrcLoop,
+ const Loop *CurDstLoop, unsigned Level,
FullDependence &Result,
Constraint &NewConstraint) const {
LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
@@ -1629,7 +1716,7 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta,
- CurLoop);
+ CurSrcLoop, CurDstLoop);
const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
@@ -1655,7 +1742,7 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
std::optional<APInt> UM;
// UM is perhaps unavailable, let's check
if (const SCEVConstant *CUB =
- collectConstantUpperBound(CurLoop, Delta->getType())) {
+ collectConstantUpperBound(CurSrcLoop, Delta->getType())) {
UM = CUB->getAPInt();
LLVM_DEBUG(dbgs() << "\t UM = " << *UM << "\n");
}
@@ -1789,12 +1876,10 @@ static bool isRemainderZero(const SCEVConstant *Dividend,
// (see also weakZeroDstSIVtest)
//
// Return true if dependence disproved.
-bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
- const SCEV *SrcConst,
- const SCEV *DstConst,
- const Loop *CurLoop, unsigned Level,
- FullDependence &Result,
- Constraint &NewConstraint) const {
+bool DependenceInfo::weakZeroSrcSIVtest(
+ const SCEV *DstCoeff, const SCEV *SrcConst, const SCEV *DstConst,
+ const Loop *CurSrcLoop, const Loop *CurDstLoop, unsigned Level,
+ FullDependence &Result, Constraint &NewConstraint) const {
// For the WeakSIV test, it's possible the loop isn't common to
// the Src and Dst loops. If it isn't, then there's no need to
// record a direction.
@@ -1808,7 +1893,7 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta,
- CurLoop);
+ CurSrcLoop, CurDstLoop);
LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
if (Level < CommonLevels) {
@@ -1829,7 +1914,8 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
// check that Delta/SrcCoeff < iteration count
// really check NewDelta < count*AbsCoeff
- if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+ if (const SCEV *UpperBound =
+ collectUpperBound(CurSrcLoop, Delta->getType())) {
LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
@@ -1898,12 +1984,10 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
// (see also weakZeroSrcSIVtest)
//
// Return true if dependence disproved.
-bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
- const SCEV *SrcConst,
- const SCEV *DstConst,
- const Loop *CurLoop, unsigned Level,
- FullDependence &Result,
- Constraint &NewConstraint) const {
+bool DependenceInfo::weakZeroDstSIVtest(
+ const SCEV *SrcCoeff, const SCEV *SrcConst, const SCEV *DstConst,
+ const Loop *CurSrcLoop, const Loop *CurDstLoop, unsigned Level,
+ FullDependence &Result, Constraint &NewConstraint) const {
// For the WeakSIV test, it's possible the loop isn't common to the
// Src and Dst loops. If it isn't, then there's no need to record a direction.
LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
@@ -1916,7 +2000,7 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta,
- CurLoop);
+ CurSrcLoop, CurDstLoop);
LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
if (Level < CommonLevels) {
@@ -1937,7 +2021,8 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
// check that Delta/SrcCoeff < iteration count
// really check NewDelta < count*AbsCoeff
- if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+ if (const SCEV *UpperBound =
+ collectUpperBound(CurSrcLoop, Delta->getType())) {
LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
@@ -2253,42 +2338,46 @@ bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
const SCEV *DstConst = DstAddRec->getStart();
const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
- const Loop *CurLoop = SrcAddRec->getLoop();
- assert(CurLoop == DstAddRec->getLoop() &&
- "both loops in SIV should be same");
- Level = mapSrcLoop(CurLoop);
+ const Loop *CurSrcLoop = SrcAddRec->getLoop();
+ const Loop *CurDstLoop = DstAddRec->getLoop();
+ assert(isSID(CurSrcLoop, CurDstLoop) &&
+ "Loops in the SIV test should have the same iteration count and "
+ "depth");
+ Level = mapSrcLoop(CurSrcLoop);
bool disproven;
if (SrcCoeff == DstCoeff)
- disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, Level,
- Result, NewConstraint);
+ disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurSrcLoop,
+ CurDstLoop, Level, Result, NewConstraint);
else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff))
- disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
- Level, Result, NewConstraint, SplitIter);
+ disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurSrcLoop,
+ CurDstLoop, Level, Result, NewConstraint,
+ SplitIter);
else
- disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
- Level, Result, NewConstraint);
+ disproven =
+ exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurSrcLoop,
+ CurDstLoop, Level, Result, NewConstraint);
return disproven || gcdMIVtest(Src, Dst, Result) ||
- symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
- CurLoop);
+ symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurSrcLoop,
+ CurDstLoop);
}
if (SrcAddRec) {
const SCEV *SrcConst = SrcAddRec->getStart();
const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
const SCEV *DstConst = Dst;
- const Loop *CurLoop = SrcAddRec->getLoop();
- Level = mapSrcLoop(CurLoop);
- return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, Level,
- Result, NewConstraint) ||
+ const Loop *CurSrcLoop = SrcAddRec->getLoop();
+ Level = mapSrcLoop(CurSrcLoop);
+ return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurSrcLoop,
+ CurSrcLoop, Level, Result, NewConstraint) ||
gcdMIVtest(Src, Dst, Result);
}
if (DstAddRec) {
const SCEV *DstConst = DstAddRec->getStart();
const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
const SCEV *SrcConst = Src;
- const Loop *CurLoop = DstAddRec->getLoop();
- Level = mapDstLoop(CurLoop);
- return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, CurLoop, Level,
- Result, NewConstraint) ||
+ const Loop *CurDstLoop = DstAddRec->getLoop();
+ Level = mapDstLoop(CurDstLoop);
+ return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, CurDstLoop,
+ CurDstLoop, Level, Result, NewConstraint) ||
gcdMIVtest(Src, Dst, Result);
}
llvm_unreachable("SIV test expected at least one AddRec");
@@ -3166,19 +3255,20 @@ bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst,
bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst,
Constraint &CurConstraint,
bool &Consistent) {
- const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+ const Loop *CurSrcLoop = CurConstraint.getAssociatedSrcLoop();
+ const Loop *CurDstLoop = CurConstraint.getAssociatedDstLoop();
LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
- const SCEV *A_K = findCoefficient(Src, CurLoop);
+ const SCEV *A_K = findCoefficient(Src, CurSrcLoop);
if (A_K->isZero())
return false;
const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
Src = SE->getMinusSCEV(Src, DA_K);
- Src = zeroCoefficient(Src, CurLoop);
+ Src = zeroCoefficient(Src, CurSrcLoop);
LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
- Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
+ Dst = addToCoefficient(Dst, CurDstLoop, SE->getNegativeSCEV(A_K));
LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
- if (!findCoefficient(Dst, CurLoop)->isZero())
+ if (!findCoefficient(Dst, CurDstLoop)->isZero())
Consistent = false;
return true;
}
@@ -3191,7 +3281,8 @@ bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst,
bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
Constraint &CurConstraint,
bool &Consistent) {
- const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+ const Loop *CurSrcLoop = CurConstraint.getAssociatedSrcLoop();
+ const Loop *CurDstLoop = CurConstraint.getAssociatedDstLoop();
const SCEV *A = CurConstraint.getA();
const SCEV *B = CurConstraint.getB();
const SCEV *C = CurConstraint.getC();
@@ -3208,10 +3299,10 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
APInt Charlie = Cconst->getAPInt();
APInt CdivB = Charlie.sdiv(Beta);
assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
- const SCEV *AP_K = findCoefficient(Dst, CurLoop);
+ const SCEV *AP_K = findCoefficient(Dst, CurDstLoop);
Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
- Dst = zeroCoefficient(Dst, CurLoop);
- if (!findCoefficient(Src, CurLoop)->isZero())
+ Dst = zeroCoefficient(Dst, CurDstLoop);
+ if (!findCoefficient(Src, CurSrcLoop)->isZero())
Consistent = false;
} else if (B->isZero()) {
const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
@@ -3222,10 +3313,10 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
APInt Charlie = Cconst->getAPInt();
APInt CdivA = Charlie.sdiv(Alpha);
assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
- const SCEV *A_K = findCoefficient(Src, CurLoop);
+ const SCEV *A_K = findCoefficient(Src, CurSrcLoop);
Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
- Src = zeroCoefficient(Src, CurLoop);
- if (!findCoefficient(Dst, CurLoop)->isZero())
+ Src = zeroCoefficient(Src, CurSrcLoop);
+ if (!findCoefficient(Dst, CurDstLoop)->isZero())
Consistent = false;
} else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) {
const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
@@ -3236,21 +3327,21 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
APInt Charlie = Cconst->getAPInt();
APInt CdivA = Charlie.sdiv(Alpha);
assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
- const SCEV *A_K = findCoefficient(Src, CurLoop);
+ const SCEV *A_K = findCoefficient(Src, CurSrcLoop);
Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
- Src = zeroCoefficient(Src, CurLoop);
- Dst = addToCoefficient(Dst, CurLoop, A_K);
- if (!findCoefficient(Dst, CurLoop)->isZero())
+ Src = zeroCoefficient(Src, CurSrcLoop);
+ Dst = addToCoefficient(Dst, CurDstLoop, A_K);
+ if (!findCoefficient(Dst, CurDstLoop)->isZero())
Consistent = false;
} else {
// paper is incorrect here, or perhaps just misleading
- const SCEV *A_K = findCoefficient(Src, CurLoop);
+ const SCEV *A_K = findCoefficient(Src, CurSrcLoop);
Src = SE->getMulExpr(Src, A);
Dst = SE->getMulExpr(Dst, A);
Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));
- Src = zeroCoefficient(Src, CurLoop);
- Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));
- if (!findCoefficient(Dst, CurLoop)->isZero())
+ Src = zeroCoefficient(Src, CurSrcLoop);
+ Dst = addToCoefficient(Dst, CurDstLoop, SE->getMulExpr(A_K, B));
+ if (!findCoefficient(Dst, CurDstLoop)->isZero())
Consistent = false;
}
LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
@@ -3263,17 +3354,18 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
// Return true if some simplification occurs.
bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst,
Constraint &CurConstraint) {
- const Loop *CurLoop = CurConstraint.getAssociatedLoop();
- const SCEV *A_K = findCoefficient(Src, CurLoop);
- const SCEV *AP_K = findCoefficient(Dst, CurLoop);
+ const Loop *CurSrcLoop = CurConstraint.getAssociatedSrcLoop();
+ const Loop *CurDstLoop = CurConstraint.getAssociatedDstLoop();
+ const SCEV *A_K = findCoefficient(Src, CurSrcLoop);
+ const SCEV *AP_K = findCoefficient(Dst, CurDstLoop);
const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
- Src = zeroCoefficient(Src, CurLoop);
+ Src = zeroCoefficient(Src, CurSrcLoop);
LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
- Dst = zeroCoefficient(Dst, CurLoop);
+ Dst = zeroCoefficient(Dst, CurDstLoop);
LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
return true;
}
@@ -3702,14 +3794,6 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
}
}
- establishNestingLevels(Src, Dst);
- LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
- LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
-
- FullDependence Result(Src, Dst, SCEVUnionPredicate(Assume, *SE),
- PossiblyLoopIndependent, CommonLevels);
- ++TotalArrayPairs;
-
unsigned Pairs = 1;
SmallVector<Subscript, 2> Pair(Pairs);
Pair[0].Src = SrcEv;
@@ -3722,6 +3806,44 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
}
}
+ // Establish loop nesting levels considering SID loops as common
+ establishNestingLevels(Src, Dst);
+
+ LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
+ LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
+ LLVM_DEBUG(dbgs() << " SID nesting levels = " << SIDStrictLevels
+ << "\n");
+
+ // Modify common levels to consider the SID levels in the tests
+ CommonLevels += SIDStrictLevels;
+ MaxLevels -= SIDStrictLevels;
+ if (SIDStrictLevels > 0) {
+ // Not all tests are handled yet over SID loops
+ // Revoke if there are any tests other than ZIV, SIV or RDIV
+ for (unsigned P = 0; P < Pairs; ++P) {
+ SmallBitVector Loops;
+ Subscript::ClassificationKind TestClass =
+ classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
+ Pair[P].Dst, LI->getLoopFor(Dst->getParent()), Loops);
+
+ if (TestClass != Subscript::ZIV && TestClass != Subscript::SIV &&
+ TestClass != Subscript::RDIV) {
+ // Revert the levels to not consider the SID levels
+ CommonLevels -= SIDStrictLevels;
+ MaxLevels += SIDStrictLevels;
+ SIDStrictLevels = 0;
+ break;
+ }
+ }
+ }
+
+ if (SIDStrictLevels > 0)
+ SIDLoopsCount++;
+
+ FullDependence Result(Src, Dst, SCEVUnionPredicate(Assume, *SE),
+ PossiblyLoopIndependent, CommonLevels);
+ ++TotalArrayPairs;
+
for (unsigned P = 0; P < Pairs; ++P) {
assert(Pair[P].Src->getType()->isIntegerTy() && "Src must be an integer");
assert(Pair[P].Dst->getType()->isIntegerTy() && "Dst must be an integer");
@@ -4028,6 +4150,27 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
#endif
}
+ if (SIDStrictLevels > 0) {
+ // Extracting SID levels from the common levels
+ // Reverting CommonLevels and MaxLevels to their original values
+ assert(CommonLevels >= SIDStrictLevels);
+ CommonLevels -= SIDStrictLevels;
+ MaxLevels += SIDStrictLevels;
+ std::unique_ptr<FullDependence::DVEntry[]> DV, DVSID;
+ DV = std::make_unique<FullDependence::DVEntry[]>(CommonLevels);
+ DVSID = std::make_unique<FullDependence::DVEntry[]>(SIDStrictLevels);
+ for (unsigned Level = 0; Level < CommonLevels; ++Level)
+ DV[Level] = Result.DV[Level];
+ for (unsigned Level = 0; Level < SIDStrictLevels; ++Level)
+ DVSID[Level] = Result.DV[CommonLevels + Level];
+ Result.DV = std::move(DV);
+ Result.DVSID = std::move(DVSID);
+ Result.Levels = CommonLevels;
+ Result.SIDStrictLevels = SIDStrictLevels;
+ // Result is not consistent if it considers SID levels
+ Result.Consistent = false;
+ }
+
if (PossiblyLoopIndependent) {
// Make sure the LoopIndependent flag is set correctly.
// All directions must include equal, otherwise no
diff --git a/llvm/test/Analysis/DependenceAnalysis/SIDLoops.ll b/llvm/test/Analysis/DependenceAnalysis/SIDLoops.ll
new file mode 100644
index 0000000000000..cce0d9670506c
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/SIDLoops.ll
@@ -0,0 +1,262 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 \
+; RUN: -da-disable-delinearization-checks | FileCheck %s
+
+
+;; for (long int i = 0; i < 10; i++) {
+;; for (long int j = 0; j < 10; j++) {
+;; for (long int k = 0; k < 10; k++) {
+;; for (long int l = 0; l < 10; l++)
+;; A[i][j][k][l] = i;
+;; }
+;; for (long int k = 1; k < 11; k++) {
+;; for (long int l = 0; l < 10; l++)
+;; A[i + 4][j + 3][k + 2][l + 1] = l;
+
+define void @sid0(ptr %A) nounwind uwtable ssp {
+; CHECK-LABEL: 'sid0'
+; CHECK-NEXT: Src: store i64 %i.013, ptr %arrayidx12, align 8 --> Dst: store i64 %i.013, ptr %arrayidx12, align 8
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i64 %i.013, ptr %arrayidx12, align 8 --> Dst: store i64 %l17.04, ptr %arrayidx24, align 8
+; CHECK-NEXT: da analyze - output [-4 -3]! / assuming 2 loop level(s) fused: [-4 -3 -3 -1]!
+; CHECK-NEXT: Src: store i64 %l17.04, ptr %arrayidx24, align 8 --> Dst: store i64 %l17.04, ptr %arrayidx24, align 8
+; CHECK-NEXT: da analyze - none!
+;
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %entry, %for.inc33
+ %i.013 = phi i64 [ %inc34, %for.inc33 ], [ 0, %entry ]
+ br label %for.cond4.preheader
+
+for.cond4.preheader: ; preds = %for.cond1.preheader, %for.inc30
+ %j.09 = phi i64 [ %inc31, %for.inc30 ], [ 0, %for.cond1.preheader ]
+ br label %for.cond7.preheader
+
+for.cond7.preheader: ; preds = %for.cond4.preheader, %for.inc12
+ %k.07 = phi i64 [ %inc13, %for.inc12 ], [ 0, %for.cond4.preheader ]
+ br label %for.body9
+
+for.body9: ; preds = %for.cond7.preheader, %for.body9
+ %l.02 = phi i64 [ %inc11, %for.body9 ], [ 0, %for.cond7.preheader ]
+ %arrayidx12 = getelementptr inbounds [100 x [100 x [100 x i64]]], ptr %A, i64 %i.013, i64 %j.09, i64 %k.07, i64 %l.02
+ store i64 %i.013, ptr %arrayidx12, align 8
+ %inc11 = add nsw i64 %l.02, 1
+ %exitcond15 = icmp ne i64 %inc11, 10
+ br i1 %exitcond15, label %for.body9, label %for.inc12
+
+for.inc12: ; preds = %for.body9
+ %inc13 = add nsw i64 %k.07, 1
+ %exitcond16 = icmp ne i64 %inc13, 10
+ br i1 %exitcond16, label %for.cond7.preheader, label %for.cond18.preheader
+
+for.cond18.preheader: ; preds = %for.inc12, %for.inc27
+ %k14.06 = phi i64 [ %inc28, %for.inc27 ], [ 1, %for.inc12 ]
+ br label %for.body20
+
+for.body20: ; preds = %for.cond18.preheader, %for.body20
+ %l17.04 = phi i64 [ %inc25, %for.body20 ], [ 0, %for.cond18.preheader ]
+ %add = add nsw i64 %l17.04, 1
+ %add21 = add nsw i64 %k14.06, 2
+ %add22 = add nsw i64 %j.09, 3
+ %add23 = add nsw i64 %i.013, 4
+ %arrayidx24 = getelementptr inbounds [100 x [100 x [100 x i64]]], ptr %A, i64 %add23, i64 %add22, i64 %add21, i64 %add
+ store i64 %l17.04, ptr %arrayidx24, align 8
+ %inc25 = add nsw i64 %l17.04, 1
+ %exitcond = icmp ne i64 %inc25, 10
+ br i1 %exitcond, label %for.body20, label %for.inc27
+
+for.inc27: ; preds = %for.body20
+ %inc28 = add nsw i64 %k14.06, 1
+ %exitcond17 = icmp ne i64 %inc28, 11
+ br i1 %exitcond17, label %for.cond18.preheader, label %for.inc30
+
+for.inc30: ; preds = %for.inc27
+ %inc31 = add nsw i64 %j.09, 1
+ %exitcond18 = icmp ne i64 %inc31, 10
+ br i1 %exitcond18, label %for.cond4.preheader, label %for.inc33
+
+for.inc33: ; preds = %for.inc30
+ %inc34 = add nsw i64 %i.013, 1
+ %exitcond19 = icmp ne i64 %inc34, 10
+ br i1 %exitcond19, label %for.cond1.preheader, label %for.end35
+
+for.end35: ; preds = %for.inc33
+ ret void
+}
+
+
+;; for (long int i = 0; i < 10; i++)
+;; A[4*i + 2] = i;
+;; for (long int j = 0; j < 10; j++)
+;; tmp = A[j + 2];
+
+define void @sid1(ptr %A) nounwind uwtable ssp {
+; CHECK-LABEL: 'sid1'
+; CHECK-NEXT: Src: store i64 %i.03, ptr %arrayidx, align 4 --> Dst: store i64 %i.03, ptr %arrayidx, align 4
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i64 %i.03, ptr %arrayidx, align 4 --> Dst: %0 = load i64, ptr %arrayidx7, align 4
+; CHECK-NEXT: da analyze - flow [|<]! / assuming 1 loop level(s) fused: [<=|<]!
+; CHECK-NEXT: Src: %0 = load i64, ptr %arrayidx7, align 4 --> Dst: %0 = load i64, ptr %arrayidx7, align 4
+; CHECK-NEXT: da analyze - none!
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %mul = shl nsw i64 %i.03, 2
+ %add = add nsw i64 %mul, 2
+
+ %arrayidx = getelementptr inbounds i64, ptr %A, i64 %add
+ store i64 %i.03, ptr %arrayidx, align 4
+ %inc = add nsw i64 %i.03, 1
+ %exitcond5 = icmp ne i64 %inc, 10
+ br i1 %exitcond5, label %for.body, label %for.body4.preheader
+
+for.body4.preheader: ; preds = %for.body
+ br label %for.body4
+
+for.body4: ; preds = %for.body4.preheader, %for.body4
+ %j.02 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.body4.preheader ]
+ %add64 = add nsw i64 %j.02, 2
+ %arrayidx7 = getelementptr inbounds i64, ptr %A, i64 %add64
+ %0 = load i64, ptr %arrayidx7, align 4
+ %inc9 = add nsw i64 %j.02, 1
+ %exitcond = icmp ne i64 %inc9, 10
+ br i1 %exitcond, label %for.body4, label %for.end10
+
+for.end10: ; preds = %for.body4
+ ret void
+}
+
+
+;; No information for SID loops is generated as MIV test is not supported
+;; on separate loops yet.
+;; for (long int i = 0; i < 10; i++) {
+;; for (long int j = 0; j < 10; j++) {
+;; for (long int k = 0; k < 10; k++) {
+;; for (long int l = 0; l < 10; l++)
+;; A[i][j][k][l] = i;
+;; }
+;; for (long int k = 1; k < 11; k++) {
+;; for (long int l = 0; l < 10; l++)
+;; A[i + 4][j + 3][k + 2][k + l] = l;
+
+define void @non_sid0(ptr %A) nounwind uwtable ssp {
+; CHECK-LABEL: 'non_sid0'
+; CHECK-NEXT: Src: store i64 %i.013, ptr %arrayidx12, align 8 --> Dst: store i64 %i.013, ptr %arrayidx12, align 8
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i64 %i.013, ptr %arrayidx12, align 8 --> Dst: store i64 %l17.04, ptr %arrayidx24, align 8
+; CHECK-NEXT: da analyze - output [-4 -3]!{{$}}
+; CHECK-NEXT: Src: store i64 %l17.04, ptr %arrayidx24, align 8 --> Dst: store i64 %l17.04, ptr %arrayidx24, align 8
+; CHECK-NEXT: da analyze - none!
+;
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %entry, %for.inc33
+ %i.013 = phi i64 [ %inc34, %for.inc33 ], [ 0, %entry ]
+ br label %for.cond4.preheader
+
+for.cond4.preheader: ; preds = %for.cond1.preheader, %for.inc30
+ %j.09 = phi i64 [ %inc31, %for.inc30 ], [ 0, %for.cond1.preheader ]
+ br label %for.cond7.preheader
+
+for.cond7.preheader: ; preds = %for.cond4.preheader, %for.inc12
+ %k.07 = phi i64 [ %inc13, %for.inc12 ], [ 0, %for.cond4.preheader ]
+ br label %for.body9
+
+for.body9: ; preds = %for.cond7.preheader, %for.body9
+ %l.02 = phi i64 [ %inc11, %for.body9 ], [ 0, %for.cond7.preheader ]
+ %arrayidx12 = getelementptr inbounds [100 x [100 x [100 x i64]]], ptr %A, i64 %i.013, i64 %j.09, i64 %k.07, i64 %l.02
+ store i64 %i.013, ptr %arrayidx12, align 8
+ %inc11 = add nsw i64 %l.02, 1
+ %exitcond15 = icmp ne i64 %inc11, 10
+ br i1 %exitcond15, label %for.body9, label %for.inc12
+
+for.inc12: ; preds = %for.body9
+ %inc13 = add nsw i64 %k.07, 1
+ %exitcond16 = icmp ne i64 %inc13, 10
+ br i1 %exitcond16, label %for.cond7.preheader, label %for.cond18.preheader
+
+for.cond18.preheader: ; preds = %for.inc12, %for.inc27
+ %k14.06 = phi i64 [ %inc28, %for.inc27 ], [ 1, %for.inc12 ]
+ br label %for.body20
+
+for.body20: ; preds = %for.cond18.preheader, %for.body20
+ %l17.04 = phi i64 [ %inc25, %for.body20 ], [ 0, %for.cond18.preheader ]
+ %add = add nsw i64 %k14.06, %l17.04
+ %add21 = add nsw i64 %k14.06, 2
+ %add22 = add nsw i64 %j.09, 3
+ %add23 = add nsw i64 %i.013, 4
+ %arrayidx24 = getelementptr inbounds [100 x [100 x [100 x i64]]], ptr %A, i64 %add23, i64 %add22, i64 %add21, i64 %add
+ store i64 %l17.04, ptr %arrayidx24, align 8
+ %inc25 = add nsw i64 %l17.04, 1
+ %exitcond = icmp ne i64 %inc25, 10
+ br i1 %exitcond, label %for.body20, label %for.inc27
+
+for.inc27: ; preds = %for.body20
+ %inc28 = add nsw i64 %k14.06, 1
+ %exitcond17 = icmp ne i64 %inc28, 11
+ br i1 %exitcond17, label %for.cond18.preheader, label %for.inc30
+
+for.inc30: ; preds = %for.inc27
+ %inc31 = add nsw i64 %j.09, 1
+ %exitcond18 = icmp ne i64 %inc31, 10
+ br i1 %exitcond18, label %for.cond4.preheader, label %for.inc33
+
+for.inc33: ; preds = %for.inc30
+ %inc34 = add nsw i64 %i.013, 1
+ %exitcond19 = icmp ne i64 %inc34, 10
+ br i1 %exitcond19, label %for.cond1.preheader, label %for.end35
+
+for.end35: ; preds = %for.inc33
+ ret void
+}
+
+
+;; Loops with different tripcounts are not considered as SID
+;; for (long int i = 0; i < 10; i++)
+;; A[4*i + 2] = i;
+;; for (long int j = 0; j < 11; j++)
+;; tmp = A[j + 2];
+
+define void @non_sid1(ptr %A) nounwind uwtable ssp {
+; CHECK-LABEL: 'non_sid1'
+; CHECK-NEXT: Src: store i64 %i.03, ptr %arrayidx, align 4 --> Dst: store i64 %i.03, ptr %arrayidx, align 4
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i64 %i.03, ptr %arrayidx, align 4 --> Dst: %0 = load i64, ptr %arrayidx7, align 4
+; CHECK-NEXT: da analyze - flow [|<]!{{$}}
+; CHECK-NEXT: Src: %0 = load i64, ptr %arrayidx7, align 4 --> Dst: %0 = load i64, ptr %arrayidx7, align 4
+; CHECK-NEXT: da analyze - none!
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %mul = shl nsw i64 %i.03, 2
+ %add = add nsw i64 %mul, 2
+
+ %arrayidx = getelementptr inbounds i64, ptr %A, i64 %add
+ store i64 %i.03, ptr %arrayidx, align 4
+ %inc = add nsw i64 %i.03, 1
+ %exitcond5 = icmp ne i64 %inc, 10
+ br i1 %exitcond5, label %for.body, label %for.body4.preheader
+
+for.body4.preheader: ; preds = %for.body
+ br label %for.body4
+
+for.body4: ; preds = %for.body4.preheader, %for.body4
+ %j.02 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.body4.preheader ]
+ %add64 = add nsw i64 %j.02, 2
+ %arrayidx7 = getelementptr inbounds i64, ptr %A, i64 %add64
+ %0 = load i64, ptr %arrayidx7, align 4
+ %inc9 = add nsw i64 %j.02, 1
+ %exitcond = icmp ne i64 %inc9, 11
+ br i1 %exitcond, label %for.body4, label %for.end10
+
+for.end10: ; preds = %for.body4
+ ret void
+}
\ No newline at end of file
>From 9fbd0fc7f925d693f0ab72dcee2db5ae6cbba2e9 Mon Sep 17 00:00:00 2001
From: a00917109 <alireza.torabian at huawei.com>
Date: Thu, 18 Sep 2025 18:46:06 -0400
Subject: [PATCH 2/2] [LoopFusion] Detecting loop-carried dependencies using DA
info
Loop fusion pass will uses the information provided by DA to
detect loop-carried dependencies and fuse the loops if it is legal.
---
llvm/lib/Transforms/Scalar/LoopFuse.cpp | 29 +++
.../LoopFusion/backward_loop_carried.ll | 185 ++++++++++++++++++
llvm/test/Transforms/LoopFusion/simple.ll | 41 ++--
3 files changed, 239 insertions(+), 16 deletions(-)
create mode 100644 llvm/test/Transforms/LoopFusion/backward_loop_carried.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index b5eb647a042b9..4c98373df0d9c 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -100,6 +100,8 @@ STATISTIC(OnlySecondCandidateIsGuarded,
"The second candidate is guarded while the first one is not");
STATISTIC(NumHoistedInsts, "Number of hoisted preheader instructions.");
STATISTIC(NumSunkInsts, "Number of hoisted preheader instructions.");
+STATISTIC(NumDepSafeFused, "Number of fused loops with dependencies "
+ "proven safe based on the dependence direction");
enum FusionDependenceAnalysisChoice {
FUSION_DEPENDENCE_ANALYSIS_SCEV,
@@ -1371,6 +1373,33 @@ struct LoopFuser {
<< "\n");
}
#endif
+ unsigned Levels = DepResult->getLevels();
+ unsigned SIDLevels = DepResult->getSIDStrictLevels();
+ unsigned CurLoopLevel = FC0.L->getLoopDepth();
+
+ bool OuterEqDir = true;
+ for (unsigned II = 1; II <= std::min(CurLoopLevel - 1, Levels); ++II) {
+ unsigned Direction = DepResult->getDirection(II, II > Levels);
+ if (!(Direction & Dependence::DVEntry::EQ)) {
+ // Different accesses in the outer levels of CurLoopLevel
+ OuterEqDir = false;
+ break;
+ }
+ }
+ if (!OuterEqDir || CurLoopLevel > Levels + SIDLevels) {
+ LLVM_DEBUG(dbgs() << "Safe to fuse with no dependency\n");
+ NumDepSafeFused++;
+ return true;
+ }
+
+ assert(CurLoopLevel > Levels && "Fusion candidates are not separated");
+ unsigned CurDir = DepResult->getDirection(CurLoopLevel, true);
+ if (!(CurDir & Dependence::DVEntry::GT)) {
+ LLVM_DEBUG(dbgs() << "Safe to fuse with backward loop-carried "
+ "dependency\n");
+ NumDepSafeFused++;
+ return true;
+ }
if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor())
LLVM_DEBUG(
diff --git a/llvm/test/Transforms/LoopFusion/backward_loop_carried.ll b/llvm/test/Transforms/LoopFusion/backward_loop_carried.ll
new file mode 100644
index 0000000000000..30c683d147d4e
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/backward_loop_carried.ll
@@ -0,0 +1,185 @@
+; RUN: opt -S -passes=loop-fusion -da-disable-delinearization-checks < %s | FileCheck %s
+
+; The two inner loops have no dependency and are allowed to be fused as in the
+; outer loops, different levels are accessed to.
+
+; C Code
+;
+;; for (long int i = 0; i < n; i++) {
+;; for (long int j = 0; j < n; j++) {
+;; for (long int k = 0; k < n; k++) {
+;; A[i][j][k] = i;
+;; }
+;; for (long int k = 0; k < n; k++) {
+;; temp = A[i + 3][j + 2][k + 1];
+
+define void @backward_dep0(i64 %n, ptr %A) nounwind uwtable ssp {
+entry:
+ %cmp10 = icmp sgt i64 %n, 0
+ br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
+
+; CHECK-LABEL: backward_dep
+; CHECK-COUNT-1: for.body{{[0-9]+}}:
+; CHECK-NOT: for.body{{[0-9]+}}:
+
+for.cond1.preheader.preheader: ; preds = %entry
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
+ %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
+ %cmp26 = icmp sgt i64 %n, 0
+ br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
+
+for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
+ br label %for.cond4.preheader
+
+for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
+ %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
+ %cmp51 = icmp sgt i64 %n, 0
+ br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
+
+for.body6.preheader: ; preds = %for.cond4.preheader
+ br label %for.body6
+
+for.body6: ; preds = %for.body6.preheader, %for.body6
+ %k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
+ %arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02
+ store i64 %i.011, ptr %arrayidx8, align 8
+ %inc = add nsw i64 %k.02, 1
+ %exitcond13 = icmp ne i64 %inc, %n
+ br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
+
+for.cond10.loopexit.loopexit: ; preds = %for.body6
+ br label %for.cond10.loopexit
+
+for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
+ %cmp113 = icmp sgt i64 %n, 0
+ br i1 %cmp113, label %for.body12.preheader, label %for.inc21
+
+for.body12.preheader: ; preds = %for.cond10.loopexit
+ br label %for.body12
+
+for.body12: ; preds = %for.body12.preheader, %for.body12
+ %k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
+ %add = add nsw i64 %k9.05, 1
+ %add13 = add nsw i64 %j.07, 2
+ %add14 = add nsw i64 %i.011, 3
+ %arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %add14, i64 %add13, i64 %add
+ %0 = load i64, ptr %arrayidx17, align 8
+ %inc19 = add nsw i64 %k9.05, 1
+ %exitcond = icmp ne i64 %inc19, %n
+ br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
+
+for.inc21.loopexit: ; preds = %for.body12
+ br label %for.inc21
+
+for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
+ %inc22 = add nsw i64 %j.07, 1
+ %exitcond14 = icmp ne i64 %inc22, %n
+ br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
+
+for.inc24.loopexit: ; preds = %for.inc21
+ br label %for.inc24
+
+for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
+ %inc25 = add nsw i64 %i.011, 1
+ %exitcond15 = icmp ne i64 %inc25, %n
+ br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
+
+for.end26.loopexit: ; preds = %for.inc24
+ br label %for.end26
+
+for.end26: ; preds = %for.end26.loopexit, %entry
+ ret void
+}
+
+; The two inner loops have a backward loop-carried dependency, allowing them
+; to be fused.
+
+; C Code
+;
+;; for (long int i = 0; i < n; i++) {
+;; for (long int j = 0; j < n; j++) {
+;; for (long int k = 0; k < n; k++) {
+;; A[i][j][k] = i;
+;; }
+;; for (long int k = 0; k < n; k++) {
+;; temp = A[i][j][k - 1];
+
+define void @backward_dep1(i64 %n, ptr %A) nounwind uwtable ssp {
+entry:
+ %cmp10 = icmp sgt i64 %n, 0
+ br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
+
+; CHECK-LABEL: backward_dep
+; CHECK-COUNT-1: for.body{{[0-9]+}}:
+; CHECK-NOT: for.body{{[0-9]+}}:
+
+for.cond1.preheader.preheader: ; preds = %entry
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
+ %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
+ %cmp26 = icmp sgt i64 %n, 0
+ br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
+
+for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
+ br label %for.cond4.preheader
+
+for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
+ %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
+ %cmp51 = icmp sgt i64 %n, 0
+ br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
+
+for.body6.preheader: ; preds = %for.cond4.preheader
+ br label %for.body6
+
+for.body6: ; preds = %for.body6.preheader, %for.body6
+ %k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
+ %arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02
+ store i64 %i.011, ptr %arrayidx8, align 8
+ %inc = add nsw i64 %k.02, 1
+ %exitcond13 = icmp ne i64 %inc, %n
+ br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
+
+for.cond10.loopexit.loopexit: ; preds = %for.body6
+ br label %for.cond10.loopexit
+
+for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
+ %cmp113 = icmp sgt i64 %n, 0
+ br i1 %cmp113, label %for.body12.preheader, label %for.inc21
+
+for.body12.preheader: ; preds = %for.cond10.loopexit
+ br label %for.body12
+
+for.body12: ; preds = %for.body12.preheader, %for.body12
+ %k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
+ %add = add nsw i64 %k9.05, -1
+ %arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %add
+ %0 = load i64, ptr %arrayidx17, align 8
+ %inc19 = add nsw i64 %k9.05, 1
+ %exitcond = icmp ne i64 %inc19, %n
+ br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
+
+for.inc21.loopexit: ; preds = %for.body12
+ br label %for.inc21
+
+for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
+ %inc22 = add nsw i64 %j.07, 1
+ %exitcond14 = icmp ne i64 %inc22, %n
+ br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
+
+for.inc24.loopexit: ; preds = %for.inc21
+ br label %for.inc24
+
+for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
+ %inc25 = add nsw i64 %i.011, 1
+ %exitcond15 = icmp ne i64 %inc25, %n
+ br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
+
+for.end26.loopexit: ; preds = %for.inc24
+ br label %for.end26
+
+for.end26: ; preds = %for.end26.loopexit, %entry
+ ret void
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/LoopFusion/simple.ll b/llvm/test/Transforms/LoopFusion/simple.ll
index d63890df14461..54556eb98ad80 100644
--- a/llvm/test/Transforms/LoopFusion/simple.ll
+++ b/llvm/test/Transforms/LoopFusion/simple.ll
@@ -298,42 +298,51 @@ bb23: ; preds = %bb17, %bb
ret void
}
+; void forward_dep(int *arg) {
+; for (int i = 0; i < 100; i++) {
+; int tmp = i - 3;
+; int val = tmp * (i + 3) % i;
+; arg[i] = val;
+; }
+;
+; for (int j = 0; j < 100; j++) {
+; int val = arg[j - 3];
+; arg[j] = val * 3;
+; }
+; }
+;
define void @forward_dep(ptr noalias %arg) {
; CHECK-LABEL: @forward_dep(
-; CHECK-NEXT: bb:
-; CHECK-NEXT: br label [[BB7:%.*]]
+; CHECK-NEXT: [[BB:.*]]:
+; CHECK-NEXT: br label %[[BB7:.*]]
; CHECK: bb7:
-; CHECK-NEXT: [[DOT013:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB14:%.*]] ]
-; CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ 0, [[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[BB14]] ]
+; CHECK-NEXT: [[DOT013:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP15:%.*]], %[[BB25:.*]] ]
+; CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ 0, %[[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], %[[BB25]] ]
+; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[BB25]] ], [ 0, %[[BB]] ]
; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[DOT013]], -3
; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[INDVARS_IV22]], 3
; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32
; CHECK-NEXT: [[TMP10:%.*]] = mul nsw i32 [[TMP]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[INDVARS_IV22]] to i32
; CHECK-NEXT: [[TMP12:%.*]] = srem i32 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[INDVARS_IV22]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[INDVARS_IV22]]
; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP13]], align 4
-; CHECK-NEXT: br label [[BB14]]
+; CHECK-NEXT: br label %[[BB14:.*]]
; CHECK: bb14:
-; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1
-; CHECK-NEXT: [[TMP15]] = add nuw nsw i32 [[DOT013]], 1
-; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100
-; CHECK-NEXT: br i1 [[EXITCOND4]], label [[BB7]], label [[BB19_PREHEADER:%.*]]
-; CHECK: bb19.preheader:
-; CHECK-NEXT: br label [[BB19:%.*]]
-; CHECK: bb19:
-; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB25:%.*]] ], [ 0, [[BB19_PREHEADER]] ]
; CHECK-NEXT: [[TMP20:%.*]] = add nsw i64 [[INDVARS_IV1]], -3
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[TMP20]]
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
; CHECK-NEXT: [[TMP23:%.*]] = mul nsw i32 [[TMP22]], 3
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[INDVARS_IV1]]
; CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4
-; CHECK-NEXT: br label [[BB25]]
+; CHECK-NEXT: br label %[[BB25]]
; CHECK: bb25:
+; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1
+; CHECK-NEXT: [[TMP15]] = add nuw nsw i32 [[DOT013]], 1
+; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV1]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[BB19]], label [[BB26:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[BB7]], label %[[BB26:.*]]
; CHECK: bb26:
; CHECK-NEXT: ret void
;
More information about the llvm-commits
mailing list