[llvm] edf7bed - [SLP][NFC] Outline lookahead heuristics into a separate helper class.
Valery N Dmitriev via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 22 18:59:57 PDT 2022
Author: Valery N Dmitriev
Date: 2022-04-22T18:59:08-07:00
New Revision: edf7bed87b77262abca0349f0f31f7e5092a5259
URL: https://github.com/llvm/llvm-project/commit/edf7bed87b77262abca0349f0f31f7e5092a5259
DIFF: https://github.com/llvm/llvm-project/commit/edf7bed87b77262abca0349f0f31f7e5092a5259.diff
LOG: [SLP][NFC] Outline lookahead heuristics into a separate helper class.
Minor refactoring to reduce size of functional change D124309:
look-ahead scoring routines pulled out of VLOperands and formed
new LookAheadHeuristics helper class.
Reviewed By: Alexey Bataev (ABataev), Vasileios Porpodas (vporpo)
Differential Revision: https://reviews.llvm.org/D124313
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2150ed99e79de..ea914a08d75a7 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1034,96 +1034,18 @@ class BoUpSLP {
#endif
};
- /// A helper data structure to hold the operands of a vector of instructions.
- /// This supports a fixed vector length for all operand vectors.
- class VLOperands {
- /// For each operand we need (i) the value, and (ii) the opcode that it
- /// would be attached to if the expression was in a left-linearized form.
- /// This is required to avoid illegal operand reordering.
- /// For example:
- /// \verbatim
- /// 0 Op1
- /// |/
- /// Op1 Op2 Linearized + Op2
- /// \ / ----------> |/
- /// - -
- ///
- /// Op1 - Op2 (0 + Op1) - Op2
- /// \endverbatim
- ///
- /// Value Op1 is attached to a '+' operation, and Op2 to a '-'.
- ///
- /// Another way to think of this is to track all the operations across the
- /// path from the operand all the way to the root of the tree and to
- /// calculate the operation that corresponds to this path. For example, the
- /// path from Op2 to the root crosses the RHS of the '-', therefore the
- /// corresponding operation is a '-' (which matches the one in the
- /// linearized tree, as shown above).
- ///
- /// For lack of a better term, we refer to this operation as Accumulated
- /// Path Operation (APO).
- struct OperandData {
- OperandData() = default;
- OperandData(Value *V, bool APO, bool IsUsed)
- : V(V), APO(APO), IsUsed(IsUsed) {}
- /// The operand value.
- Value *V = nullptr;
- /// TreeEntries only allow a single opcode, or an alternate sequence of
- /// them (e.g, +, -). Therefore, we can safely use a boolean value for the
- /// APO. It is set to 'true' if 'V' is attached to an inverse operation
- /// in the left-linearized form (e.g., Sub/Div), and 'false' otherwise
- /// (e.g., Add/Mul)
- bool APO = false;
- /// Helper data for the reordering function.
- bool IsUsed = false;
- };
-
- /// During operand reordering, we are trying to select the operand at lane
- /// that matches best with the operand at the neighboring lane. Our
- /// selection is based on the type of value we are looking for. For example,
- /// if the neighboring lane has a load, we need to look for a load that is
- /// accessing a consecutive address. These strategies are summarized in the
- /// 'ReorderingMode' enumerator.
- enum class ReorderingMode {
- Load, ///< Matching loads to consecutive memory addresses
- Opcode, ///< Matching instructions based on opcode (same or alternate)
- Constant, ///< Matching constants
- Splat, ///< Matching the same instruction multiple times (broadcast)
- Failed, ///< We failed to create a vectorizable group
- };
-
- using OperandDataVec = SmallVector<OperandData, 2>;
-
- /// A vector of operand vectors.
- SmallVector<OperandDataVec, 4> OpsVec;
-
+ /// A helper class used for scoring candidates for two consecutive lanes.
+ class LookAheadHeuristics {
const DataLayout &DL;
ScalarEvolution &SE;
const BoUpSLP &R;
+ int NumLanes; // Total number of lanes (aka vectorization factor).
+ int MaxLevel; // The maximum recursion depth for accumulating score.
- /// \returns the operand data at \p OpIdx and \p Lane.
- OperandData &getData(unsigned OpIdx, unsigned Lane) {
- return OpsVec[OpIdx][Lane];
- }
-
- /// \returns the operand data at \p OpIdx and \p Lane. Const version.
- const OperandData &getData(unsigned OpIdx, unsigned Lane) const {
- return OpsVec[OpIdx][Lane];
- }
-
- /// Clears the used flag for all entries.
- void clearUsed() {
- for (unsigned OpIdx = 0, NumOperands = getNumOperands();
- OpIdx != NumOperands; ++OpIdx)
- for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
- ++Lane)
- OpsVec[OpIdx][Lane].IsUsed = false;
- }
-
- /// Swap the operand at \p OpIdx1 with that one at \p OpIdx2.
- void swap(unsigned OpIdx1, unsigned OpIdx2, unsigned Lane) {
- std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]);
- }
+ public:
+ LookAheadHeuristics(const DataLayout &DL, ScalarEvolution &SE,
+ const BoUpSLP &R, int NumLanes, int MaxLevel)
+ : DL(DL), SE(SE), R(R), NumLanes(NumLanes), MaxLevel(MaxLevel) {}
// The hard-coded scores listed here are not very important, though it shall
// be higher for better matches to improve the resulting cost. When
@@ -1169,8 +1091,7 @@ class BoUpSLP {
/// Also, checks if \p V1 and \p V2 are compatible with instructions in \p
/// MainAltOps.
int getShallowScore(Value *V1, Value *V2, Instruction *U1, Instruction *U2,
- const DataLayout &DL, ScalarEvolution &SE, int NumLanes,
- ArrayRef<Value *> MainAltOps) {
+ ArrayRef<Value *> MainAltOps) const {
if (V1 == V2) {
if (isa<LoadInst>(V1)) {
// Retruns true if the users of V1 and V2 won't need to be extracted.
@@ -1192,37 +1113,37 @@ class BoUpSLP {
ElementCount::getFixed(NumLanes)) &&
((int)V1->getNumUses() == NumLanes ||
AllUsersAreInternal(V1, V2)))
- return VLOperands::ScoreSplatLoads;
+ return LookAheadHeuristics::ScoreSplatLoads;
}
- return VLOperands::ScoreSplat;
+ return LookAheadHeuristics::ScoreSplat;
}
auto *LI1 = dyn_cast<LoadInst>(V1);
auto *LI2 = dyn_cast<LoadInst>(V2);
if (LI1 && LI2) {
if (LI1->getParent() != LI2->getParent())
- return VLOperands::ScoreFail;
+ return LookAheadHeuristics::ScoreFail;
Optional<int> Dist = getPointersDiff(
LI1->getType(), LI1->getPointerOperand(), LI2->getType(),
LI2->getPointerOperand(), DL, SE, /*StrictCheck=*/true);
if (!Dist || *Dist == 0)
- return VLOperands::ScoreFail;
+ return LookAheadHeuristics::ScoreFail;
// The distance is too large - still may be profitable to use masked
// loads/gathers.
if (std::abs(*Dist) > NumLanes / 2)
- return VLOperands::ScoreAltOpcodes;
+ return LookAheadHeuristics::ScoreAltOpcodes;
// This still will detect consecutive loads, but we might have "holes"
// in some cases. It is ok for non-power-2 vectorization and may produce
// better results. It should not affect current vectorization.
- return (*Dist > 0) ? VLOperands::ScoreConsecutiveLoads
- : VLOperands::ScoreReversedLoads;
+ return (*Dist > 0) ? LookAheadHeuristics::ScoreConsecutiveLoads
+ : LookAheadHeuristics::ScoreReversedLoads;
}
auto *C1 = dyn_cast<Constant>(V1);
auto *C2 = dyn_cast<Constant>(V2);
if (C1 && C2)
- return VLOperands::ScoreConstants;
+ return LookAheadHeuristics::ScoreConstants;
// Extracts from consecutive indexes of the same vector better score as
// the extracts could be optimized away.
@@ -1231,7 +1152,7 @@ class BoUpSLP {
if (match(V1, m_ExtractElt(m_Value(EV1), m_ConstantInt(Ex1Idx)))) {
// Undefs are always profitable for extractelements.
if (isa<UndefValue>(V2))
- return VLOperands::ScoreConsecutiveExtracts;
+ return LookAheadHeuristics::ScoreConsecutiveExtracts;
Value *EV2 = nullptr;
ConstantInt *Ex2Idx = nullptr;
if (match(V2,
@@ -1239,9 +1160,9 @@ class BoUpSLP {
m_Undef())))) {
// Undefs are always profitable for extractelements.
if (!Ex2Idx)
- return VLOperands::ScoreConsecutiveExtracts;
+ return LookAheadHeuristics::ScoreConsecutiveExtracts;
if (isUndefVector(EV2) && EV2->getType() == EV1->getType())
- return VLOperands::ScoreConsecutiveExtracts;
+ return LookAheadHeuristics::ScoreConsecutiveExtracts;
if (EV2 == EV1) {
int Idx1 = Ex1Idx->getZExtValue();
int Idx2 = Ex2Idx->getZExtValue();
@@ -1249,22 +1170,22 @@ class BoUpSLP {
// The distance is too large - still may be profitable to use
// shuffles.
if (std::abs(Dist) == 0)
- return VLOperands::ScoreSplat;
+ return LookAheadHeuristics::ScoreSplat;
if (std::abs(Dist) > NumLanes / 2)
- return VLOperands::ScoreSameOpcode;
- return (Dist > 0) ? VLOperands::ScoreConsecutiveExtracts
- : VLOperands::ScoreReversedExtracts;
+ return LookAheadHeuristics::ScoreSameOpcode;
+ return (Dist > 0) ? LookAheadHeuristics::ScoreConsecutiveExtracts
+ : LookAheadHeuristics::ScoreReversedExtracts;
}
- return VLOperands::ScoreAltOpcodes;
+ return LookAheadHeuristics::ScoreAltOpcodes;
}
- return VLOperands::ScoreFail;
+ return LookAheadHeuristics::ScoreFail;
}
auto *I1 = dyn_cast<Instruction>(V1);
auto *I2 = dyn_cast<Instruction>(V2);
if (I1 && I2) {
if (I1->getParent() != I2->getParent())
- return VLOperands::ScoreFail;
+ return LookAheadHeuristics::ScoreFail;
SmallVector<Value *, 4> Ops(MainAltOps.begin(), MainAltOps.end());
Ops.push_back(I1);
Ops.push_back(I2);
@@ -1278,77 +1199,17 @@ class BoUpSLP {
return cast<Instruction>(V)->getNumOperands() ==
S.MainOp->getNumOperands();
}))
- return S.isAltShuffle() ? VLOperands::ScoreAltOpcodes
- : VLOperands::ScoreSameOpcode;
+ return S.isAltShuffle() ? LookAheadHeuristics::ScoreAltOpcodes
+ : LookAheadHeuristics::ScoreSameOpcode;
}
if (isa<UndefValue>(V2))
- return VLOperands::ScoreUndef;
+ return LookAheadHeuristics::ScoreUndef;
- return VLOperands::ScoreFail;
+ return LookAheadHeuristics::ScoreFail;
}
- /// \param Lane lane of the operands under analysis.
- /// \param OpIdx operand index in \p Lane lane we're looking the best
- /// candidate for.
- /// \param Idx operand index of the current candidate value.
- /// \returns The additional score due to possible broadcasting of the
- /// elements in the lane. It is more profitable to have power-of-2 unique
- /// elements in the lane, it will be vectorized with higher probability
- /// after removing duplicates. Currently the SLP vectorizer supports only
- /// vectorization of the power-of-2 number of unique scalars.
- int getSplatScore(unsigned Lane, unsigned OpIdx, unsigned Idx) const {
- Value *IdxLaneV = getData(Idx, Lane).V;
- if (!isa<Instruction>(IdxLaneV) || IdxLaneV == getData(OpIdx, Lane).V)
- return 0;
- SmallPtrSet<Value *, 4> Uniques;
- for (unsigned Ln = 0, E = getNumLanes(); Ln < E; ++Ln) {
- if (Ln == Lane)
- continue;
- Value *OpIdxLnV = getData(OpIdx, Ln).V;
- if (!isa<Instruction>(OpIdxLnV))
- return 0;
- Uniques.insert(OpIdxLnV);
- }
- int UniquesCount = Uniques.size();
- int UniquesCntWithIdxLaneV =
- Uniques.contains(IdxLaneV) ? UniquesCount : UniquesCount + 1;
- Value *OpIdxLaneV = getData(OpIdx, Lane).V;
- int UniquesCntWithOpIdxLaneV =
- Uniques.contains(OpIdxLaneV) ? UniquesCount : UniquesCount + 1;
- if (UniquesCntWithIdxLaneV == UniquesCntWithOpIdxLaneV)
- return 0;
- return (PowerOf2Ceil(UniquesCntWithOpIdxLaneV) -
- UniquesCntWithOpIdxLaneV) -
- (PowerOf2Ceil(UniquesCntWithIdxLaneV) - UniquesCntWithIdxLaneV);
- }
-
- /// \param Lane lane of the operands under analysis.
- /// \param OpIdx operand index in \p Lane lane we're looking the best
- /// candidate for.
- /// \param Idx operand index of the current candidate value.
- /// \returns The additional score for the scalar which users are all
- /// vectorized.
- int getExternalUseScore(unsigned Lane, unsigned OpIdx, unsigned Idx) const {
- Value *IdxLaneV = getData(Idx, Lane).V;
- Value *OpIdxLaneV = getData(OpIdx, Lane).V;
- // Do not care about number of uses for vector-like instructions
- // (extractelement/extractvalue with constant indices), they are extracts
- // themselves and already externally used. Vectorization of such
- // instructions does not add extra extractelement instruction, just may
- // remove it.
- if (isVectorLikeInstWithConstOps(IdxLaneV) &&
- isVectorLikeInstWithConstOps(OpIdxLaneV))
- return VLOperands::ScoreAllUserVectorized;
- auto *IdxLaneI = dyn_cast<Instruction>(IdxLaneV);
- if (!IdxLaneI || !isa<Instruction>(OpIdxLaneV))
- return 0;
- return R.areAllUsersVectorized(IdxLaneI, None)
- ? VLOperands::ScoreAllUserVectorized
- : 0;
- }
-
- /// Go through the operands of \p LHS and \p RHS recursively until \p
+ /// Go through the operands of \p LHS and \p RHS recursively until
/// MaxLevel, and return the cummulative score. \p U1 and \p U2 are
/// the users of \p LHS and \p RHS (that is \p LHS and \p RHS are operands
/// of \p U1 and \p U2), except at the beginning of the recursion where
@@ -1365,8 +1226,8 @@ class BoUpSLP {
/// each level recursively, accumulating the score. It starts from matching
/// the additions at level 0, then moves on to the loads (level 1). The
/// score of G1 and G2 is higher than G1 and G3, because {A[0],A[1]} and
- /// {B[0],B[1]} match with VLOperands::ScoreConsecutiveLoads, while
- /// {A[0],C[0]} has a score of VLOperands::ScoreFail.
+ /// {B[0],B[1]} match with LookAheadHeuristics::ScoreConsecutiveLoads, while
+ /// {A[0],C[0]} has a score of LookAheadHeuristics::ScoreFail.
/// Please note that the order of the operands does not matter, as we
/// evaluate the score of all profitable combinations of operands. In
/// other words the score of G1 and G4 is the same as G1 and G2. This
@@ -1375,12 +1236,12 @@ class BoUpSLP {
/// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
/// Luís F. W. Góes
int getScoreAtLevelRec(Value *LHS, Value *RHS, Instruction *U1,
- Instruction *U2, int CurrLevel, int MaxLevel,
- ArrayRef<Value *> MainAltOps) {
+ Instruction *U2, int CurrLevel,
+ ArrayRef<Value *> MainAltOps) const {
// Get the shallow score of V1 and V2.
int ShallowScoreAtThisLevel =
- getShallowScore(LHS, RHS, U1, U2, DL, SE, getNumLanes(), MainAltOps);
+ getShallowScore(LHS, RHS, U1, U2, MainAltOps);
// If reached MaxLevel,
// or if V1 and V2 are not instructions,
@@ -1391,7 +1252,7 @@ class BoUpSLP {
auto *I1 = dyn_cast<Instruction>(LHS);
auto *I2 = dyn_cast<Instruction>(RHS);
if (CurrLevel == MaxLevel || !(I1 && I2) || I1 == I2 ||
- ShallowScoreAtThisLevel == VLOperands::ScoreFail ||
+ ShallowScoreAtThisLevel == LookAheadHeuristics::ScoreFail ||
(((isa<LoadInst>(I1) && isa<LoadInst>(I2)) ||
(I1->getNumOperands() > 2 && I2->getNumOperands() > 2) ||
(isa<ExtractElementInst>(I1) && isa<ExtractElementInst>(I2))) &&
@@ -1423,9 +1284,10 @@ class BoUpSLP {
// Recursively calculate the cost at each level
int TmpScore =
getScoreAtLevelRec(I1->getOperand(OpIdx1), I2->getOperand(OpIdx2),
- I1, I2, CurrLevel + 1, MaxLevel, None);
+ I1, I2, CurrLevel + 1, None);
// Look for the best score.
- if (TmpScore > VLOperands::ScoreFail && TmpScore > MaxTmpScore) {
+ if (TmpScore > LookAheadHeuristics::ScoreFail &&
+ TmpScore > MaxTmpScore) {
MaxTmpScore = TmpScore;
MaxOpIdx2 = OpIdx2;
FoundBest = true;
@@ -1439,6 +1301,157 @@ class BoUpSLP {
}
return ShallowScoreAtThisLevel;
}
+ };
+ /// A helper data structure to hold the operands of a vector of instructions.
+ /// This supports a fixed vector length for all operand vectors.
+ class VLOperands {
+ /// For each operand we need (i) the value, and (ii) the opcode that it
+ /// would be attached to if the expression was in a left-linearized form.
+ /// This is required to avoid illegal operand reordering.
+ /// For example:
+ /// \verbatim
+ /// 0 Op1
+ /// |/
+ /// Op1 Op2 Linearized + Op2
+ /// \ / ----------> |/
+ /// - -
+ ///
+ /// Op1 - Op2 (0 + Op1) - Op2
+ /// \endverbatim
+ ///
+ /// Value Op1 is attached to a '+' operation, and Op2 to a '-'.
+ ///
+ /// Another way to think of this is to track all the operations across the
+ /// path from the operand all the way to the root of the tree and to
+ /// calculate the operation that corresponds to this path. For example, the
+ /// path from Op2 to the root crosses the RHS of the '-', therefore the
+ /// corresponding operation is a '-' (which matches the one in the
+ /// linearized tree, as shown above).
+ ///
+ /// For lack of a better term, we refer to this operation as Accumulated
+ /// Path Operation (APO).
+ struct OperandData {
+ OperandData() = default;
+ OperandData(Value *V, bool APO, bool IsUsed)
+ : V(V), APO(APO), IsUsed(IsUsed) {}
+ /// The operand value.
+ Value *V = nullptr;
+ /// TreeEntries only allow a single opcode, or an alternate sequence of
+ /// them (e.g, +, -). Therefore, we can safely use a boolean value for the
+ /// APO. It is set to 'true' if 'V' is attached to an inverse operation
+ /// in the left-linearized form (e.g., Sub/Div), and 'false' otherwise
+ /// (e.g., Add/Mul)
+ bool APO = false;
+ /// Helper data for the reordering function.
+ bool IsUsed = false;
+ };
+
+ /// During operand reordering, we are trying to select the operand at lane
+ /// that matches best with the operand at the neighboring lane. Our
+ /// selection is based on the type of value we are looking for. For example,
+ /// if the neighboring lane has a load, we need to look for a load that is
+ /// accessing a consecutive address. These strategies are summarized in the
+ /// 'ReorderingMode' enumerator.
+ enum class ReorderingMode {
+ Load, ///< Matching loads to consecutive memory addresses
+ Opcode, ///< Matching instructions based on opcode (same or alternate)
+ Constant, ///< Matching constants
+ Splat, ///< Matching the same instruction multiple times (broadcast)
+ Failed, ///< We failed to create a vectorizable group
+ };
+
+ using OperandDataVec = SmallVector<OperandData, 2>;
+
+ /// A vector of operand vectors.
+ SmallVector<OperandDataVec, 4> OpsVec;
+
+ const DataLayout &DL;
+ ScalarEvolution &SE;
+ const BoUpSLP &R;
+
+ /// \returns the operand data at \p OpIdx and \p Lane.
+ OperandData &getData(unsigned OpIdx, unsigned Lane) {
+ return OpsVec[OpIdx][Lane];
+ }
+
+ /// \returns the operand data at \p OpIdx and \p Lane. Const version.
+ const OperandData &getData(unsigned OpIdx, unsigned Lane) const {
+ return OpsVec[OpIdx][Lane];
+ }
+
+ /// Clears the used flag for all entries.
+ void clearUsed() {
+ for (unsigned OpIdx = 0, NumOperands = getNumOperands();
+ OpIdx != NumOperands; ++OpIdx)
+ for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
+ ++Lane)
+ OpsVec[OpIdx][Lane].IsUsed = false;
+ }
+
+ /// Swap the operand at \p OpIdx1 with that one at \p OpIdx2.
+ void swap(unsigned OpIdx1, unsigned OpIdx2, unsigned Lane) {
+ std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]);
+ }
+
+ /// \param Lane lane of the operands under analysis.
+ /// \param OpIdx operand index in \p Lane lane we're looking the best
+ /// candidate for.
+ /// \param Idx operand index of the current candidate value.
+ /// \returns The additional score due to possible broadcasting of the
+ /// elements in the lane. It is more profitable to have power-of-2 unique
+ /// elements in the lane, it will be vectorized with higher probability
+ /// after removing duplicates. Currently the SLP vectorizer supports only
+ /// vectorization of the power-of-2 number of unique scalars.
+ int getSplatScore(unsigned Lane, unsigned OpIdx, unsigned Idx) const {
+ Value *IdxLaneV = getData(Idx, Lane).V;
+ if (!isa<Instruction>(IdxLaneV) || IdxLaneV == getData(OpIdx, Lane).V)
+ return 0;
+ SmallPtrSet<Value *, 4> Uniques;
+ for (unsigned Ln = 0, E = getNumLanes(); Ln < E; ++Ln) {
+ if (Ln == Lane)
+ continue;
+ Value *OpIdxLnV = getData(OpIdx, Ln).V;
+ if (!isa<Instruction>(OpIdxLnV))
+ return 0;
+ Uniques.insert(OpIdxLnV);
+ }
+ int UniquesCount = Uniques.size();
+ int UniquesCntWithIdxLaneV =
+ Uniques.contains(IdxLaneV) ? UniquesCount : UniquesCount + 1;
+ Value *OpIdxLaneV = getData(OpIdx, Lane).V;
+ int UniquesCntWithOpIdxLaneV =
+ Uniques.contains(OpIdxLaneV) ? UniquesCount : UniquesCount + 1;
+ if (UniquesCntWithIdxLaneV == UniquesCntWithOpIdxLaneV)
+ return 0;
+ return (PowerOf2Ceil(UniquesCntWithOpIdxLaneV) -
+ UniquesCntWithOpIdxLaneV) -
+ (PowerOf2Ceil(UniquesCntWithIdxLaneV) - UniquesCntWithIdxLaneV);
+ }
+
+ /// \param Lane lane of the operands under analysis.
+ /// \param OpIdx operand index in \p Lane lane we're looking the best
+ /// candidate for.
+ /// \param Idx operand index of the current candidate value.
+ /// \returns The additional score for the scalar which users are all
+ /// vectorized.
+ int getExternalUseScore(unsigned Lane, unsigned OpIdx, unsigned Idx) const {
+ Value *IdxLaneV = getData(Idx, Lane).V;
+ Value *OpIdxLaneV = getData(OpIdx, Lane).V;
+ // Do not care about number of uses for vector-like instructions
+ // (extractelement/extractvalue with constant indices), they are extracts
+ // themselves and already externally used. Vectorization of such
+ // instructions does not add extra extractelement instruction, just may
+ // remove it.
+ if (isVectorLikeInstWithConstOps(IdxLaneV) &&
+ isVectorLikeInstWithConstOps(OpIdxLaneV))
+ return LookAheadHeuristics::ScoreAllUserVectorized;
+ auto *IdxLaneI = dyn_cast<Instruction>(IdxLaneV);
+ if (!IdxLaneI || !isa<Instruction>(OpIdxLaneV))
+ return 0;
+ return R.areAllUsersVectorized(IdxLaneI, None)
+ ? LookAheadHeuristics::ScoreAllUserVectorized
+ : 0;
+ }
/// Score scaling factor for fully compatible instructions but with
///
diff erent number of external uses. Allows better selection of the
@@ -1453,10 +1466,13 @@ class BoUpSLP {
int getLookAheadScore(Value *LHS, Value *RHS, ArrayRef<Value *> MainAltOps,
int Lane, unsigned OpIdx, unsigned Idx,
bool &IsUsed) {
+ LookAheadHeuristics LookAhead(DL, SE, R, getNumLanes(),
+ LookAheadMaxDepth);
// Keep track of the instruction stack as we recurse into the operands
// during the look-ahead score exploration.
- int Score = getScoreAtLevelRec(LHS, RHS, /*U1=*/nullptr, /*U2=*/nullptr,
- 1, LookAheadMaxDepth, MainAltOps);
+ int Score =
+ LookAhead.getScoreAtLevelRec(LHS, RHS, /*U1=*/nullptr, /*U2=*/nullptr,
+ /*CurrLevel=*/1, MainAltOps);
if (Score) {
int SplatScore = getSplatScore(Lane, OpIdx, Idx);
if (Score <= -SplatScore) {
More information about the llvm-commits
mailing list