[llvm] LV: fix style after cursory reading (NFC) (PR #105830)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 6 08:13:25 PDT 2024
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/105830
>From be7152fe42ca8ccc9a33ae86353a348a84ce7bce Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Fri, 23 Aug 2024 14:37:20 +0100
Subject: [PATCH 1/2] LV: fix style after cursory reading (NFC)
---
.../Transforms/Vectorize/LoopVectorize.cpp | 305 +++++++++---------
1 file changed, 151 insertions(+), 154 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0200525a718d5f..edd18ab4d8c87f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -71,7 +71,6 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
@@ -104,7 +103,6 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -126,12 +124,9 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/IR/VectorBuilder.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InstructionCost.h"
@@ -148,12 +143,10 @@
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
#include <cassert>
-#include <cmath>
#include <cstdint>
#include <functional>
#include <iterator>
#include <limits>
-#include <map>
#include <memory>
#include <string>
#include <tuple>
@@ -361,7 +354,7 @@ cl::opt<bool> EnableVPlanNativePath(
"enable-vplan-native-path", cl::Hidden,
cl::desc("Enable VPlan-native vectorization path with "
"support for outer loop vectorization."));
-}
+} // namespace llvm
// This flag enables the stress testing of the VPlan H-CFG construction in the
// VPlan-native vectorization path. It must be used in conjuction with
@@ -1190,8 +1183,8 @@ class LoopVectorizationCostModel {
assert(VF.isVector() && "Expected VF >=2");
/// Broadcast this decicion to all instructions inside the group.
/// But the cost will be assigned to one instruction only.
- for (unsigned i = 0; i < Grp->getFactor(); ++i) {
- if (auto *I = Grp->getMember(i)) {
+ for (unsigned Idx = 0; Idx < Grp->getFactor(); ++Idx) {
+ if (auto *I = Grp->getMember(Idx)) {
if (Grp->getInsertPos() == I)
WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, Cost);
else
@@ -1834,7 +1827,7 @@ class GeneratedRTChecks {
/// un-linked from the IR and is added back during vector code generation. If
/// there is no vector code generation, the check blocks are removed
/// completely.
- void Create(Loop *L, const LoopAccessInfo &LAI,
+ void create(Loop *L, const LoopAccessInfo &LAI,
const SCEVPredicate &UnionPred, ElementCount VF, unsigned IC) {
// Hard cutoff to limit compile-time increase in case a very large number of
@@ -2610,7 +2603,7 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
IRBuilder<> B(LoopVectorPreHeader->getTerminator());
// Fast-math-flags propagate from the original induction instruction.
- if (II.getInductionBinOp() && isa<FPMathOperator>(II.getInductionBinOp()))
+ if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
EndValue = emitTransformedIndex(B, VectorTripCount, II.getStartValue(),
@@ -2791,7 +2784,7 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
IRBuilder<> B(MiddleBlock->getTerminator());
// Fast-math-flags propagate from the original induction instruction.
- if (II.getInductionBinOp() && isa<FPMathOperator>(II.getInductionBinOp()))
+ if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
Value *CountMinusOne = B.CreateSub(
@@ -2903,7 +2896,7 @@ LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
return ScalarCallCost;
}
-static Type *MaybeVectorizeType(Type *Elt, ElementCount VF) {
+static Type *maybeVectorizeType(Type *Elt, ElementCount VF) {
if (VF.isScalar() || (!Elt->isIntOrPtrTy() && !Elt->isFloatingPointTy()))
return Elt;
return VectorType::get(Elt, VF);
@@ -2914,7 +2907,7 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
ElementCount VF) const {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
assert(ID && "Expected intrinsic call!");
- Type *RetTy = MaybeVectorizeType(CI->getType(), VF);
+ Type *RetTy = maybeVectorizeType(CI->getType(), VF);
FastMathFlags FMF;
if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
FMF = FPMO->getFastMathFlags();
@@ -2924,7 +2917,7 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
SmallVector<Type *> ParamTys;
std::transform(FTy->param_begin(), FTy->param_end(),
std::back_inserter(ParamTys),
- [&](Type *Ty) { return MaybeVectorizeType(Ty, VF); });
+ [&](Type *Ty) { return maybeVectorizeType(Ty, VF); });
IntrinsicCostAttributes CostAttrs(ID, RetTy, Arguments, ParamTys, FMF,
dyn_cast<IntrinsicInst>(CI));
@@ -3016,7 +3009,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
// Returns true if a given use occurs in the predicated block. Phi nodes use
// their operands in their corresponding predecessor blocks.
- auto isBlockOfUsePredicated = [&](Use &U) -> bool {
+ auto IsBlockOfUsePredicated = [&](Use &U) -> bool {
auto *I = cast<Instruction>(U.getUser());
BasicBlock *BB = I->getParent();
if (auto *Phi = dyn_cast<PHINode>(I))
@@ -3060,7 +3053,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
// It's legal to sink the instruction if all its uses occur in the
// predicated block. Otherwise, there's nothing to do yet, and we may
// need to reanalyze the instruction.
- if (!llvm::all_of(I->uses(), isBlockOfUsePredicated)) {
+ if (!llvm::all_of(I->uses(), IsBlockOfUsePredicated)) {
InstsToReanalyze.push_back(I);
continue;
}
@@ -3088,9 +3081,9 @@ void InnerLoopVectorizer::fixNonInductionPHIs(VPlan &Plan,
PHINode *NewPhi = cast<PHINode>(State.get(VPPhi, 0));
// Make sure the builder has a valid insert point.
Builder.SetInsertPoint(NewPhi);
- for (unsigned i = 0; i < VPPhi->getNumOperands(); ++i) {
- VPValue *Inc = VPPhi->getIncomingValue(i);
- VPBasicBlock *VPBB = VPPhi->getIncomingBlock(i);
+ for (unsigned Idx = 0; Idx < VPPhi->getNumOperands(); ++Idx) {
+ VPValue *Inc = VPPhi->getIncomingValue(Idx);
+ VPBasicBlock *VPBB = VPPhi->getIncomingBlock(Idx);
NewPhi->addIncoming(State.get(Inc, 0), State.CFG.VPBB2IRBB[VPBB]);
}
}
@@ -3124,7 +3117,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
// The pointer operands of loads and stores will be scalar as long as the
// memory access is not a gather or scatter operation. The value operand of a
// store will remain scalar if the store is scalarized.
- auto isScalarUse = [&](Instruction *MemAccess, Value *Ptr) {
+ auto IsScalarUse = [&](Instruction *MemAccess, Value *Ptr) {
InstWidening WideningDecision = getWideningDecision(MemAccess, VF);
assert(WideningDecision != CM_Unknown &&
"Widening decision should be ready at this moment");
@@ -3138,7 +3131,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
// A helper that returns true if the given value is a getelementptr
// instruction contained in the loop.
- auto isLoopVaryingGEP = [&](Value *V) {
+ auto IsLoopVaryingGEP = [&](Value *V) {
return isa<GetElementPtrInst>(V) && !TheLoop->isLoopInvariant(V);
};
@@ -3146,10 +3139,10 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
// be a scalar use and the pointer is only used by memory accesses, we place
// the pointer in ScalarPtrs. Otherwise, the pointer is placed in
// PossibleNonScalarPtrs.
- auto evaluatePtrUse = [&](Instruction *MemAccess, Value *Ptr) {
+ auto EvaluatePtrUse = [&](Instruction *MemAccess, Value *Ptr) {
// We only care about bitcast and getelementptr instructions contained in
// the loop.
- if (!isLoopVaryingGEP(Ptr))
+ if (!IsLoopVaryingGEP(Ptr))
return;
// If the pointer has already been identified as scalar (e.g., if it was
@@ -3161,7 +3154,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
// If the use of the pointer will be a scalar use, and all users of the
// pointer are memory accesses, place the pointer in ScalarPtrs. Otherwise,
// place the pointer in PossibleNonScalarPtrs.
- if (isScalarUse(MemAccess, Ptr) && llvm::all_of(I->users(), [&](User *U) {
+ if (IsScalarUse(MemAccess, Ptr) && llvm::all_of(I->users(), [&](User *U) {
return isa<LoadInst>(U) || isa<StoreInst>(U);
}))
ScalarPtrs.insert(I);
@@ -3186,10 +3179,10 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
for (auto *BB : TheLoop->blocks())
for (auto &I : *BB) {
if (auto *Load = dyn_cast<LoadInst>(&I)) {
- evaluatePtrUse(Load, Load->getPointerOperand());
+ EvaluatePtrUse(Load, Load->getPointerOperand());
} else if (auto *Store = dyn_cast<StoreInst>(&I)) {
- evaluatePtrUse(Store, Store->getPointerOperand());
- evaluatePtrUse(Store, Store->getValueOperand());
+ EvaluatePtrUse(Store, Store->getPointerOperand());
+ EvaluatePtrUse(Store, Store->getValueOperand());
}
}
for (auto *I : ScalarPtrs)
@@ -3215,14 +3208,14 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
unsigned Idx = 0;
while (Idx != Worklist.size()) {
Instruction *Dst = Worklist[Idx++];
- if (!isLoopVaryingGEP(Dst->getOperand(0)))
+ if (!IsLoopVaryingGEP(Dst->getOperand(0)))
continue;
auto *Src = cast<Instruction>(Dst->getOperand(0));
if (llvm::all_of(Src->users(), [&](User *U) -> bool {
auto *J = cast<Instruction>(U);
return !TheLoop->contains(J) || Worklist.count(J) ||
((isa<LoadInst>(J) || isa<StoreInst>(J)) &&
- isScalarUse(J, Src));
+ IsScalarUse(J, Src));
})) {
Worklist.insert(Src);
LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *Src << "\n");
@@ -3247,7 +3240,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
return Induction.second.getKind() ==
InductionDescriptor::IK_PtrInduction &&
(isa<LoadInst>(I) || isa<StoreInst>(I)) &&
- Indvar == getLoadStorePointerOperand(I) && isScalarUse(I, Indvar);
+ Indvar == getLoadStorePointerOperand(I) && IsScalarUse(I, Indvar);
};
// Determine if all users of the induction variable are scalar after
@@ -3466,21 +3459,20 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
// losslessly cast all values to a common type.
unsigned InterleaveFactor = Group->getFactor();
bool ScalarNI = DL.isNonIntegralPointerType(ScalarTy);
- for (unsigned i = 0; i < InterleaveFactor; i++) {
- Instruction *Member = Group->getMember(i);
+ for (unsigned Idx = 0; Idx < InterleaveFactor; Idx++) {
+ Instruction *Member = Group->getMember(Idx);
if (!Member)
continue;
auto *MemberTy = getLoadStoreType(Member);
bool MemberNI = DL.isNonIntegralPointerType(MemberTy);
// Don't coerce non-integral pointers to integers or vice versa.
- if (MemberNI != ScalarNI) {
+ if (MemberNI != ScalarNI)
// TODO: Consider adding special nullptr value case here
return false;
- } else if (MemberNI && ScalarNI &&
- ScalarTy->getPointerAddressSpace() !=
- MemberTy->getPointerAddressSpace()) {
+ if (MemberNI && ScalarNI &&
+ ScalarTy->getPointerAddressSpace() !=
+ MemberTy->getPointerAddressSpace())
return false;
- }
}
// Check if masking is required.
@@ -3561,7 +3553,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// Global values, params and instructions outside of current loop are out of
// scope.
- auto isOutOfScope = [&](Value *V) -> bool {
+ auto IsOutOfScope = [&](Value *V) -> bool {
Instruction *I = dyn_cast<Instruction>(V);
return (!I || !TheLoop->contains(I));
};
@@ -3573,8 +3565,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// that require predication must not be considered uniform after
// vectorization, because that would create an erroneous replicating region
// where only a single instance out of VF should be formed.
- auto addToWorklistIfAllowed = [&](Instruction *I) -> void {
- if (isOutOfScope(I)) {
+ auto AddToWorklistIfAllowed = [&](Instruction *I) -> void {
+ if (IsOutOfScope(I)) {
LLVM_DEBUG(dbgs() << "LV: Found not uniform due to scope: "
<< *I << "\n");
return;
@@ -3597,13 +3589,13 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
for (BasicBlock *E : Exiting) {
auto *Cmp = dyn_cast<Instruction>(E->getTerminator()->getOperand(0));
if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse())
- addToWorklistIfAllowed(Cmp);
+ AddToWorklistIfAllowed(Cmp);
}
auto PrevVF = VF.divideCoefficientBy(2);
// Return true if all lanes perform the same memory operation, and we can
// thus chose to execute only one.
- auto isUniformMemOpUse = [&](Instruction *I) {
+ auto IsUniformMemOpUse = [&](Instruction *I) {
// If the value was already known to not be uniform for the previous
// (smaller VF), it cannot be uniform for the larger VF.
if (PrevVF.isVector()) {
@@ -3621,12 +3613,12 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
return TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand());
};
- auto isUniformDecision = [&](Instruction *I, ElementCount VF) {
+ auto IsUniformDecision = [&](Instruction *I, ElementCount VF) {
InstWidening WideningDecision = getWideningDecision(I, VF);
assert(WideningDecision != CM_Unknown &&
"Widening decision should be ready at this moment");
- if (isUniformMemOpUse(I))
+ if (IsUniformMemOpUse(I))
return true;
return (WideningDecision == CM_Widen ||
@@ -3637,11 +3629,11 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// Returns true if Ptr is the pointer operand of a memory access instruction
// I, I is known to not require scalarization, and the pointer is not also
// stored.
- auto isVectorizedMemAccessUse = [&](Instruction *I, Value *Ptr) -> bool {
+ auto IsVectorizedMemAccessUse = [&](Instruction *I, Value *Ptr) -> bool {
if (isa<StoreInst>(I) && I->getOperand(0) == Ptr)
return false;
return getLoadStorePointerOperand(I) == Ptr &&
- (isUniformDecision(I, VF) || Legal->isInvariant(Ptr));
+ (IsUniformDecision(I, VF) || Legal->isInvariant(Ptr));
};
// Holds a list of values which are known to have at least one uniform use.
@@ -3663,7 +3655,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
if (TheLoop->hasLoopInvariantOperands(&I))
- addToWorklistIfAllowed(&I);
+ AddToWorklistIfAllowed(&I);
break;
default:
break;
@@ -3673,9 +3665,9 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// ExtractValue instructions must be uniform, because the operands are
// known to be loop-invariant.
if (auto *EVI = dyn_cast<ExtractValueInst>(&I)) {
- assert(isOutOfScope(EVI->getAggregateOperand()) &&
+ assert(IsOutOfScope(EVI->getAggregateOperand()) &&
"Expected aggregate value to be loop invariant");
- addToWorklistIfAllowed(EVI);
+ AddToWorklistIfAllowed(EVI);
continue;
}
@@ -3684,10 +3676,10 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
if (!Ptr)
continue;
- if (isUniformMemOpUse(&I))
- addToWorklistIfAllowed(&I);
+ if (IsUniformMemOpUse(&I))
+ AddToWorklistIfAllowed(&I);
- if (isVectorizedMemAccessUse(&I, Ptr))
+ if (IsVectorizedMemAccessUse(&I, Ptr))
HasUniformUse.insert(Ptr);
}
@@ -3695,28 +3687,28 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// demanding) users. Since loops are assumed to be in LCSSA form, this
// disallows uses outside the loop as well.
for (auto *V : HasUniformUse) {
- if (isOutOfScope(V))
+ if (IsOutOfScope(V))
continue;
auto *I = cast<Instruction>(V);
auto UsersAreMemAccesses =
llvm::all_of(I->users(), [&](User *U) -> bool {
auto *UI = cast<Instruction>(U);
- return TheLoop->contains(UI) && isVectorizedMemAccessUse(UI, V);
+ return TheLoop->contains(UI) && IsVectorizedMemAccessUse(UI, V);
});
if (UsersAreMemAccesses)
- addToWorklistIfAllowed(I);
+ AddToWorklistIfAllowed(I);
}
// Expand Worklist in topological order: whenever a new instruction
// is added , its users should be already inside Worklist. It ensures
// a uniform instruction will only be used by uniform instructions.
- unsigned idx = 0;
- while (idx != Worklist.size()) {
- Instruction *I = Worklist[idx++];
+ unsigned Idx = 0;
+ while (Idx != Worklist.size()) {
+ Instruction *I = Worklist[Idx++];
for (auto *OV : I->operand_values()) {
// isOutOfScope operands cannot be uniform instructions.
- if (isOutOfScope(OV))
+ if (IsOutOfScope(OV))
continue;
// First order recurrence Phi's should typically be considered
// non-uniform.
@@ -3728,9 +3720,9 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
auto *OI = cast<Instruction>(OV);
if (llvm::all_of(OI->users(), [&](User *U) -> bool {
auto *J = cast<Instruction>(U);
- return Worklist.count(J) || isVectorizedMemAccessUse(J, OI);
+ return Worklist.count(J) || IsVectorizedMemAccessUse(J, OI);
}))
- addToWorklistIfAllowed(OI);
+ AddToWorklistIfAllowed(OI);
}
}
@@ -3750,7 +3742,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
auto UniformInd = llvm::all_of(Ind->users(), [&](User *U) -> bool {
auto *I = cast<Instruction>(U);
return I == IndUpdate || !TheLoop->contains(I) || Worklist.count(I) ||
- isVectorizedMemAccessUse(I, Ind);
+ IsVectorizedMemAccessUse(I, Ind);
});
if (!UniformInd)
continue;
@@ -3761,14 +3753,14 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
llvm::all_of(IndUpdate->users(), [&](User *U) -> bool {
auto *I = cast<Instruction>(U);
return I == Ind || !TheLoop->contains(I) || Worklist.count(I) ||
- isVectorizedMemAccessUse(I, IndUpdate);
+ IsVectorizedMemAccessUse(I, IndUpdate);
});
if (!UniformIndUpdate)
continue;
// The induction variable and its update instruction will remain uniform.
- addToWorklistIfAllowed(Ind);
- addToWorklistIfAllowed(IndUpdate);
+ AddToWorklistIfAllowed(Ind);
+ AddToWorklistIfAllowed(IndUpdate);
}
Uniforms[VF].insert(Worklist.begin(), Worklist.end());
@@ -3918,8 +3910,8 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
if (UserVF.isScalable())
return FixedScalableVFPair(
ElementCount::getFixed(UserVF.getKnownMinValue()), UserVF);
- else
- return UserVF;
+
+ return UserVF;
}
assert(ElementCount::isKnownGT(UserVF, MaxSafeUserVF));
@@ -4706,11 +4698,10 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
ElementCount ForcedEC = ElementCount::getFixed(EpilogueVectorizationForceVF);
if (hasPlanWithVF(ForcedEC))
return {ForcedEC, 0, 0};
- else {
- LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization forced factor is not "
- "viable.\n");
- return Result;
- }
+
+ LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization forced factor is not "
+ "viable.\n");
+ return Result;
}
if (OrigLoop->getHeader()->getParent()->hasOptSize() ||
@@ -4900,8 +4891,8 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
RegisterUsage R = calculateRegisterUsage({VF})[0];
// We divide by these constants so assume that we have at least one
// instruction that uses at least one register.
- for (auto& pair : R.MaxLocalUsers) {
- pair.second = std::max(pair.second, 1U);
+ for (auto &Pair : R.MaxLocalUsers) {
+ Pair.second = std::max(Pair.second, 1U);
}
// We calculate the interleave count using the following formula.
@@ -4917,11 +4908,12 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
// this currently happens when OptForSize, in which case IC is set to 1 above.
unsigned IC = UINT_MAX;
- for (auto& pair : R.MaxLocalUsers) {
- unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first);
+ for (const auto &Pair : R.MaxLocalUsers) {
+ unsigned TargetNumRegisters = TTI.getNumberOfRegisters(Pair.first);
LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
<< " registers of "
- << TTI.getRegisterClassName(pair.first) << " register class\n");
+ << TTI.getRegisterClassName(Pair.first)
+ << " register class\n");
if (VF.isScalar()) {
if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
TargetNumRegisters = ForceTargetNumScalarRegs;
@@ -4929,10 +4921,10 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
if (ForceTargetNumVectorRegs.getNumOccurrences() > 0)
TargetNumRegisters = ForceTargetNumVectorRegs;
}
- unsigned MaxLocalUsers = pair.second;
+ unsigned MaxLocalUsers = Pair.second;
unsigned LoopInvariantRegs = 0;
- if (R.LoopInvariantRegs.find(pair.first) != R.LoopInvariantRegs.end())
- LoopInvariantRegs = R.LoopInvariantRegs[pair.first];
+ if (R.LoopInvariantRegs.find(Pair.first) != R.LoopInvariantRegs.end())
+ LoopInvariantRegs = R.LoopInvariantRegs[Pair.first];
unsigned TmpIC = llvm::bit_floor((TargetNumRegisters - LoopInvariantRegs) /
MaxLocalUsers);
@@ -5117,10 +5109,10 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
// Interleave no less than SmallIC but not as aggressive as the normal IC
// to satisfy the rare situation when resources are too limited.
return std::max(IC / 2, SmallIC);
- } else {
- LLVM_DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n");
- return SmallIC;
}
+
+ LLVM_DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n");
+ return SmallIC;
}
// Interleave if this is a large loop (small loops are already dealt with by
@@ -5223,11 +5215,11 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
return TTICapture.getRegUsageForType(VectorType::get(Ty, VF));
};
- for (unsigned int i = 0, s = IdxToInstr.size(); i < s; ++i) {
- Instruction *I = IdxToInstr[i];
+ for (unsigned int Idx = 0, Sz = IdxToInstr.size(); Idx < Sz; ++Idx) {
+ Instruction *I = IdxToInstr[Idx];
// Remove all of the instructions that end at this location.
- InstrList &List = TransposeEnds[i];
+ InstrList &List = TransposeEnds[Idx];
for (Instruction *ToRemove : List)
OpenIntervals.erase(ToRemove);
@@ -5242,7 +5234,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
collectInLoopReductions();
// For each VF find the maximum usage of registers.
- for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
+ for (unsigned J = 0, E = VFs.size(); J < E; ++J) {
// Count the number of registers used, per register class, given all open
// intervals.
// Note that elements in this SmallMapVector will be default constructed
@@ -5250,7 +5242,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
// there is no previous entry for ClassID.
SmallMapVector<unsigned, unsigned, 4> RegUsage;
- if (VFs[j].isScalar()) {
+ if (VFs[J].isScalar()) {
for (auto *Inst : OpenIntervals) {
unsigned ClassID =
TTI.getRegisterClassForType(false, Inst->getType());
@@ -5259,12 +5251,12 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
RegUsage[ClassID] += 1;
}
} else {
- collectUniformsAndScalars(VFs[j]);
+ collectUniformsAndScalars(VFs[J]);
for (auto *Inst : OpenIntervals) {
// Skip ignored values for VF > 1.
if (VecValuesToIgnore.count(Inst))
continue;
- if (isScalarAfterVectorization(Inst, VFs[j])) {
+ if (isScalarAfterVectorization(Inst, VFs[J])) {
unsigned ClassID =
TTI.getRegisterClassForType(false, Inst->getType());
// FIXME: The target might use more than one register for the type
@@ -5273,25 +5265,25 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
} else {
unsigned ClassID =
TTI.getRegisterClassForType(true, Inst->getType());
- RegUsage[ClassID] += GetRegUsage(Inst->getType(), VFs[j]);
+ RegUsage[ClassID] += GetRegUsage(Inst->getType(), VFs[J]);
}
}
}
- for (auto& pair : RegUsage) {
- auto &Entry = MaxUsages[j][pair.first];
- Entry = std::max(Entry, pair.second);
+ for (const auto &Pair : RegUsage) {
+ auto &Entry = MaxUsages[J][Pair.first];
+ Entry = std::max(Entry, Pair.second);
}
}
- LLVM_DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # "
+ LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # "
<< OpenIntervals.size() << '\n');
// Add the current instruction to the list of open intervals.
OpenIntervals.insert(I);
}
- for (unsigned i = 0, e = VFs.size(); i < e; ++i) {
+ for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) {
// Note that elements in this SmallMapVector will be default constructed
// as 0. So we can use "Invariant[ClassID] += n" in the code below even if
// there is no previous entry for ClassID.
@@ -5303,20 +5295,20 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
bool IsScalar = all_of(Inst->users(), [&](User *U) {
auto *I = cast<Instruction>(U);
return TheLoop != LI->getLoopFor(I->getParent()) ||
- isScalarAfterVectorization(I, VFs[i]);
+ isScalarAfterVectorization(I, VFs[Idx]);
});
- ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[i];
+ ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[Idx];
unsigned ClassID =
TTI.getRegisterClassForType(VF.isVector(), Inst->getType());
Invariant[ClassID] += GetRegUsage(Inst->getType(), VF);
}
LLVM_DEBUG({
- dbgs() << "LV(REG): VF = " << VFs[i] << '\n';
- dbgs() << "LV(REG): Found max usage: " << MaxUsages[i].size()
+ dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n';
+ dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size()
<< " item\n";
- for (const auto &pair : MaxUsages[i]) {
+ for (const auto &pair : MaxUsages[Idx]) {
dbgs() << "LV(REG): RegisterClass: "
<< TTI.getRegisterClassName(pair.first) << ", " << pair.second
<< " registers\n";
@@ -5331,8 +5323,8 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
});
RU.LoopInvariantRegs = Invariant;
- RU.MaxLocalUsers = MaxUsages[i];
- RUs[i] = RU;
+ RU.MaxLocalUsers = MaxUsages[Idx];
+ RUs[Idx] = RU;
}
return RUs;
@@ -5423,7 +5415,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
SmallVector<Instruction *, 8> Worklist;
// Returns true if the given instruction can be scalarized.
- auto canBeScalarized = [&](Instruction *I) -> bool {
+ auto CanBeScalarized = [&](Instruction *I) -> bool {
// We only attempt to scalarize instructions forming a single-use chain
// from the original predicated block that would otherwise be vectorized.
// Although not strictly necessary, we give up on instructions we know will
@@ -5499,7 +5491,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
if (auto *J = dyn_cast<Instruction>(U.get())) {
assert(VectorType::isValidElementType(J->getType()) &&
"Instruction has non-scalar type");
- if (canBeScalarized(J))
+ if (CanBeScalarized(J))
Worklist.push_back(J);
else if (needsExtract(J, VF)) {
ScalarCost += TTI.getScalarizationOverhead(
@@ -5579,10 +5571,10 @@ static const SCEV *getAddressAccessSCEV(
// We are looking for a gep with all loop invariant indices except for one
// which should be an induction variable.
- auto SE = PSE.getSE();
+ auto *SE = PSE.getSE();
unsigned NumOperands = Gep->getNumOperands();
- for (unsigned i = 1; i < NumOperands; ++i) {
- Value *Opd = Gep->getOperand(i);
+ for (unsigned Idx = 1; Idx < NumOperands; ++Idx) {
+ Value *Opd = Gep->getOperand(Idx);
if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
!Legal->isInductionVariable(Opd))
return nullptr;
@@ -5601,7 +5593,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
return InstructionCost::getInvalid();
Type *ValTy = getLoadStoreType(I);
- auto SE = PSE.getSE();
+ auto *SE = PSE.getSE();
unsigned AS = getLoadStoreAddressSpace(I);
Value *Ptr = getLoadStorePointerOperand(I);
@@ -5636,10 +5628,10 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
Cost /= getReciprocalPredBlockProb();
// Add the cost of an i1 extract and a branch
- auto *Vec_i1Ty =
+ auto *Veci1Ty =
VectorType::get(IntegerType::getInt1Ty(ValTy->getContext()), VF);
Cost += TTI.getScalarizationOverhead(
- Vec_i1Ty, APInt::getAllOnes(VF.getKnownMinValue()),
+ Veci1Ty, APInt::getAllOnes(VF.getKnownMinValue()),
/*Insert=*/false, /*Extract=*/true, CostKind);
Cost += TTI.getCFInstrCost(Instruction::Br, CostKind);
@@ -5700,11 +5692,11 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
}
StoreInst *SI = cast<StoreInst>(I);
- bool isLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand());
+ bool IsLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand());
return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
CostKind) +
- (isLoopInvariantStoreValue
+ (IsLoopInvariantStoreValue
? 0
: TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
CostKind, VF.getKnownMinValue() - 1));
@@ -5732,7 +5724,7 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
unsigned AS = getLoadStoreAddressSpace(I);
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
- auto Group = getInterleavedAccessGroup(I);
+ const auto *Group = getInterleavedAccessGroup(I);
assert(Group && "Fail to get an interleaved access group.");
unsigned InterleaveFactor = Group->getFactor();
@@ -5999,7 +5991,7 @@ InstructionCost LoopVectorizationCostModel::getScalarizationOverhead(
// any overhead.
SmallVector<Type *> Tys;
for (auto *V : filterExtractingOperands(Ops, VF))
- Tys.push_back(MaybeVectorizeType(V->getType(), VF));
+ Tys.push_back(maybeVectorizeType(V->getType(), VF));
return Cost + TTI.getOperandsScalarizationOverhead(
filterExtractingOperands(Ops, VF), Tys, CostKind);
}
@@ -6023,7 +6015,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
NumPredStores++;
if (Legal->isUniformMemOp(I, VF)) {
- auto isLegalToScalarize = [&]() {
+ auto IsLegalToScalarize = [&]() {
if (!VF.isScalable())
// Scalarization of fixed length vectors "just works".
return true;
@@ -6054,8 +6046,9 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
// Store: Scalar store + isLoopInvariantStoreValue ? 0 : extract
// FIXME: This cost is a significant under-estimate for tail folded
// memory ops.
- const InstructionCost ScalarizationCost = isLegalToScalarize() ?
- getUniformMemOpCost(&I, VF) : InstructionCost::getInvalid();
+ const InstructionCost ScalarizationCost =
+ IsLegalToScalarize() ? getUniformMemOpCost(&I, VF)
+ : InstructionCost::getInvalid();
// Choose better solution for the current VF, Note that Invalid
// costs compare as maximumal large. If both are invalid, we get
@@ -6084,7 +6077,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
InstructionCost InterleaveCost = InstructionCost::getInvalid();
unsigned NumAccesses = 1;
if (isAccessInterleaved(&I)) {
- auto Group = getInterleavedAccessGroup(&I);
+ const auto *Group = getInterleavedAccessGroup(&I);
assert(Group && "Fail to get an interleaved access group.");
// Make one decision for the whole group.
@@ -6122,7 +6115,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
// If the instructions belongs to an interleave group, the whole group
// receives the same decision. The whole group receives the cost, but
// the cost will actually be assigned to one instruction.
- if (auto Group = getInterleavedAccessGroup(&I))
+ if (const auto *Group = getInterleavedAccessGroup(&I))
setWideningDecision(Group, VF, Decision, Cost);
else
setWideningDecision(&I, VF, Decision, Cost);
@@ -6173,7 +6166,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
I, VF, CM_Scalarize,
(VF.getKnownMinValue() *
getMemoryInstructionCost(I, ElementCount::getFixed(1))));
- else if (auto Group = getInterleavedAccessGroup(I)) {
+ else if (const auto *Group = getInterleavedAccessGroup(I)) {
// Scalarize an interleave group of address loads.
for (unsigned I = 0; I < Group->getFactor(); ++I) {
if (Instruction *Member = Group->getMember(I))
@@ -6385,10 +6378,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
Type *RetTy = I->getType();
if (canTruncateToMinimalBitwidth(I, VF))
RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
- auto SE = PSE.getSE();
+ auto *SE = PSE.getSE();
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
- auto hasSingleCopyAfterVectorization = [this](Instruction *I,
+ auto HasSingleCopyAfterVectorization = [this](Instruction *I,
ElementCount VF) -> bool {
if (VF.isScalar())
return true;
@@ -6402,7 +6395,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
return !Scalarized->second.count(UI);
});
};
- (void) hasSingleCopyAfterVectorization;
+ (void)HasSingleCopyAfterVectorization;
Type *VectorTy;
if (isScalarAfterVectorization(I, VF)) {
@@ -6415,7 +6408,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
I->getOpcode() == Instruction::PHI ||
(I->getOpcode() == Instruction::BitCast &&
I->getType()->isPointerTy()) ||
- hasSingleCopyAfterVectorization(I, VF));
+ HasSingleCopyAfterVectorization(I, VF));
VectorTy = RetTy;
} else
VectorTy = ToVectorTy(RetTy, VF);
@@ -6452,19 +6445,21 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
if (VF.isScalable())
return InstructionCost::getInvalid();
// Return cost for branches around scalarized and predicated blocks.
- auto *Vec_i1Ty =
+ auto *Veci1Ty =
VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF);
return (
TTI.getScalarizationOverhead(
- Vec_i1Ty, APInt::getAllOnes(VF.getFixedValue()),
+ Veci1Ty, APInt::getAllOnes(VF.getFixedValue()),
/*Insert*/ false, /*Extract*/ true, CostKind) +
(TTI.getCFInstrCost(Instruction::Br, CostKind) * VF.getFixedValue()));
- } else if (I->getParent() == TheLoop->getLoopLatch() || VF.isScalar())
+ }
+
+ if (I->getParent() == TheLoop->getLoopLatch() || VF.isScalar())
// The back-edge branch will remain, as will all scalar branches.
return TTI.getCFInstrCost(Instruction::Br, CostKind);
- else
- // This branch will be eliminated by if-conversion.
- return 0;
+
+ // This branch will be eliminated by if-conversion.
+ return 0;
// Note: We currently assume zero cost for an unconditional branch inside
// a predicated block since it will become a fall-through, although we
// may decide in the future to call TTI for all branches.
@@ -7057,9 +7052,9 @@ void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
buildVPlansWithVPRecipes(UserVF, UserVF);
LLVM_DEBUG(printPlans(dbgs()));
return;
- } else
- reportVectorizationInfo("UserVF ignored because of invalid costs.",
- "InvalidCost", ORE, OrigLoop);
+ }
+ reportVectorizationInfo("UserVF ignored because of invalid costs.",
+ "InvalidCost", ORE, OrigLoop);
}
}
@@ -7397,7 +7392,7 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
return BestFactor;
}
-static void AddRuntimeUnrollDisableMetaData(Loop *L) {
+static void addRuntimeUnrollDisableMetaData(Loop *L) {
SmallVector<Metadata *, 4> MDs;
// Reserve first location for self reference to the LoopID metadata node.
MDs.push_back(nullptr);
@@ -7405,14 +7400,14 @@ static void AddRuntimeUnrollDisableMetaData(Loop *L) {
MDNode *LoopID = L->getLoopID();
if (LoopID) {
// First find existing loop unrolling disable metadata.
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- auto *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ for (unsigned I = 1, IE = LoopID->getNumOperands(); I < IE; ++I) {
+ auto *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
if (MD) {
const auto *S = dyn_cast<MDString>(MD->getOperand(0));
IsUnrollMetadata =
S && S->getString().starts_with("llvm.loop.unroll.disable");
}
- MDs.push_back(LoopID->getOperand(i));
+ MDs.push_back(LoopID->getOperand(I));
}
}
@@ -7618,7 +7613,7 @@ LoopVectorizationPlanner::executePlan(
TargetTransformInfo::UnrollingPreferences UP;
TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE);
if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
- AddRuntimeUnrollDisableMetaData(L);
+ addRuntimeUnrollDisableMetaData(L);
// 3. Fix the vectorized code: take care of header phi's, live-outs,
// predication, updating analyses.
@@ -8120,7 +8115,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
"Must be called with either a load or store");
- auto willWiden = [&](ElementCount VF) -> bool {
+ auto WillWiden = [&](ElementCount VF) -> bool {
LoopVectorizationCostModel::InstWidening Decision =
CM.getWideningDecision(I, VF);
assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
@@ -8133,7 +8128,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
return Decision != LoopVectorizationCostModel::CM_Scalarize;
};
- if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
+ if (!LoopVectorizationPlanner::getDecisionAndClampRange(WillWiden, Range))
return nullptr;
VPValue *Mask = nullptr;
@@ -8220,7 +8215,7 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
// Determine whether \p K is a truncation based on an induction variable that
// can be optimized.
- auto isOptimizableIVTruncate =
+ auto IsOptimizableIVTruncate =
[&](Instruction *K) -> std::function<bool(ElementCount)> {
return [=](ElementCount VF) -> bool {
return CM.isOptimizableIVTruncate(K, VF);
@@ -8228,7 +8223,7 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
};
if (LoopVectorizationPlanner::getDecisionAndClampRange(
- isOptimizableIVTruncate(I), Range)) {
+ IsOptimizableIVTruncate(I), Range)) {
auto *Phi = cast<PHINode>(I->getOperand(0));
const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi);
@@ -8510,7 +8505,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
// First, check for specific widening recipes that deal with inductions, Phi
// nodes, calls and memory operations.
VPRecipeBase *Recipe;
- if (auto Phi = dyn_cast<PHINode>(Instr)) {
+ if (auto *Phi = dyn_cast<PHINode>(Instr)) {
if (Phi->getParent() != OrigLoop->getHeader())
return tryToBlend(Phi, Operands);
@@ -8560,7 +8555,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
if (!shouldWiden(Instr, Range))
return nullptr;
- if (auto GEP = dyn_cast<GetElementPtrInst>(Instr))
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(Instr))
return new VPWidenGEPRecipe(GEP,
make_range(Operands.begin(), Operands.end()));
@@ -8634,7 +8629,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
static MapVector<PHINode *, VPValue *> collectUsersInExitBlock(
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
- auto MiddleVPBB =
+ auto *MiddleVPBB =
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
// No edge from the middle block to the unique exit block has been inserted
// and there is nothing to fix from vector loop; phis should have incoming
@@ -8675,7 +8670,7 @@ addUsersInExitBlock(VPlan &Plan,
if (ExitingValuesToFix.empty())
return;
- auto MiddleVPBB =
+ auto *MiddleVPBB =
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
BasicBlock *ExitBB =
cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock();
@@ -8915,7 +8910,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// placeholders for its members' Recipes which we'll be replacing with a
// single VPInterleaveRecipe.
for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
- auto applyIG = [IG, this](ElementCount VF) -> bool {
+ auto ApplyIG = [IG, this](ElementCount VF) -> bool {
bool Result = (VF.isVector() && // Query is illegal for VF == 1
CM.getWideningDecision(IG->getInsertPos(), VF) ==
LoopVectorizationCostModel::CM_Interleave);
@@ -8926,7 +8921,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
"Unsupported interleave factor for scalable vectors");
return Result;
};
- if (!getDecisionAndClampRange(applyIG, Range))
+ if (!getDecisionAndClampRange(ApplyIG, Range))
continue;
InterleaveGroups.insert(IG);
};
@@ -9973,7 +9968,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Optimistically generate runtime checks if they are needed. Drop them if
// they turn out to not be profitable.
if (VF.Width.isVector() || SelectedIC > 1)
- Checks.Create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
+ Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
// Check if it is profitable to vectorize with runtime checks.
bool ForceVectorization =
@@ -10055,7 +10050,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
<< IntDiagMsg.second;
});
return false;
- } else if (!VectorizeLoop && InterleaveLoop) {
+ }
+
+ if (!VectorizeLoop && InterleaveLoop) {
LLVM_DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
ORE->emit([&]() {
return OptimizationRemarkAnalysis(VAPassName, VecDiagMsg.first,
@@ -10230,7 +10227,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
L->setLoopID(*RemainderLoopID);
} else {
if (DisableRuntimeUnroll)
- AddRuntimeUnrollDisableMetaData(L);
+ addRuntimeUnrollDisableMetaData(L);
// Mark the loop as already vectorized to avoid vectorizing again.
Hints.setAlreadyVectorized();
>From 0aa4f704e0e85d8bcb1e71a826dfa5926d014080 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Fri, 6 Sep 2024 16:11:21 +0100
Subject: [PATCH 2/2] LV: fix nit VecI1Ty
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index edd18ab4d8c87f..2a4932a944c87c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5628,10 +5628,10 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
Cost /= getReciprocalPredBlockProb();
// Add the cost of an i1 extract and a branch
- auto *Veci1Ty =
+ auto *VecI1Ty =
VectorType::get(IntegerType::getInt1Ty(ValTy->getContext()), VF);
Cost += TTI.getScalarizationOverhead(
- Veci1Ty, APInt::getAllOnes(VF.getKnownMinValue()),
+ VecI1Ty, APInt::getAllOnes(VF.getKnownMinValue()),
/*Insert=*/false, /*Extract=*/true, CostKind);
Cost += TTI.getCFInstrCost(Instruction::Br, CostKind);
@@ -6445,11 +6445,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
if (VF.isScalable())
return InstructionCost::getInvalid();
// Return cost for branches around scalarized and predicated blocks.
- auto *Veci1Ty =
+ auto *VecI1Ty =
VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF);
return (
TTI.getScalarizationOverhead(
- Veci1Ty, APInt::getAllOnes(VF.getFixedValue()),
+ VecI1Ty, APInt::getAllOnes(VF.getFixedValue()),
/*Insert*/ false, /*Extract*/ true, CostKind) +
(TTI.getCFInstrCost(Instruction::Br, CostKind) * VF.getFixedValue()));
}
More information about the llvm-commits
mailing list