[llvm] f20b507 - [llvm] Use llvm::bit_floor instead of llvm::PowerOf2Floor (NFC)
Kazu Hirata via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 28 09:06:38 PST 2023
Author: Kazu Hirata
Date: 2023-01-28T09:06:31-08:00
New Revision: f20b5071f3ab69bef389f550d5b93129e135d9f9
URL: https://github.com/llvm/llvm-project/commit/f20b5071f3ab69bef389f550d5b93129e135d9f9
DIFF: https://github.com/llvm/llvm-project/commit/f20b5071f3ab69bef389f550d5b93129e135d9f9.diff
LOG: [llvm] Use llvm::bit_floor instead of llvm::PowerOf2Floor (NFC)
Added:
Modified:
clang/lib/CodeGen/CGExprCXX.cpp
lld/ELF/SyntheticSections.cpp
lldb/source/Plugins/Process/Linux/IntelPTSingleBufferTrace.cpp
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
llvm/lib/MC/MCAsmStreamer.cpp
llvm/lib/MC/MCParser/AsmParser.cpp
llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
llvm/lib/MCA/InstrBuilder.cpp
llvm/lib/Support/FoldingSet.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Transforms/IPO/AttributorAttributes.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
index b889a4e05ee1..90beb957be49 100644
--- a/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/clang/lib/CodeGen/CGExprCXX.cpp
@@ -1654,7 +1654,7 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
CharUnits allocationAlign = allocAlign;
if (!E->passAlignment() &&
allocator->isReplaceableGlobalAllocationFunction()) {
- unsigned AllocatorAlign = llvm::PowerOf2Floor(std::min<uint64_t>(
+ unsigned AllocatorAlign = llvm::bit_floor(std::min<uint64_t>(
Target.getNewAlign(), getContext().getTypeSize(allocType)));
allocationAlign = std::max(
allocationAlign, getContext().toCharUnitsFromBits(AllocatorAlign));
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index e7c437bc7653..8fe134006ecb 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -2777,7 +2777,7 @@ createSymbols(
// speed it up.
constexpr size_t numShards = 32;
const size_t concurrency =
- PowerOf2Floor(std::min<size_t>(config->threadCount, numShards));
+ llvm::bit_floor(std::min<size_t>(config->threadCount, numShards));
// A sharded map to uniquify symbols by name.
auto map =
@@ -3265,7 +3265,7 @@ void MergeNoTailSection::finalizeContents() {
// Concurrency level. Must be a power of 2 to avoid expensive modulo
// operations in the following tight loop.
const size_t concurrency =
- PowerOf2Floor(std::min<size_t>(config->threadCount, numShards));
+ llvm::bit_floor(std::min<size_t>(config->threadCount, numShards));
// Add section pieces to the builders.
parallelFor(0, concurrency, [&](size_t threadId) {
diff --git a/lldb/source/Plugins/Process/Linux/IntelPTSingleBufferTrace.cpp b/lldb/source/Plugins/Process/Linux/IntelPTSingleBufferTrace.cpp
index f64f29f81f0a..a1bf8405902c 100644
--- a/lldb/source/Plugins/Process/Linux/IntelPTSingleBufferTrace.cpp
+++ b/lldb/source/Plugins/Process/Linux/IntelPTSingleBufferTrace.cpp
@@ -250,7 +250,7 @@ IntelPTSingleBufferTrace::Start(const TraceIntelPTStartRequest &request,
request.ipt_trace_size);
}
uint64_t page_size = getpagesize();
- uint64_t aux_buffer_numpages = static_cast<uint64_t>(llvm::PowerOf2Floor(
+ uint64_t aux_buffer_numpages = static_cast<uint64_t>(llvm::bit_floor(
(request.ipt_trace_size + page_size - 1) / page_size));
Expected<perf_event_attr> attr = CreateIntelPTPerfEventConfiguration(
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 8a1fce2d3d65..5c651371bd3c 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3004,7 +3004,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
if (!isPowerOf2_32(MemSizeInBits)) {
// This load needs splitting into power of 2 sized loads.
- LargeSplitSize = PowerOf2Floor(MemSizeInBits);
+ LargeSplitSize = llvm::bit_floor(MemSizeInBits);
SmallSplitSize = MemSizeInBits - LargeSplitSize;
} else {
// This is already a power of 2, but we still need to split this in half.
@@ -3122,7 +3122,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
uint64_t LargeSplitSize, SmallSplitSize;
if (!isPowerOf2_32(MemSizeInBits)) {
- LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
+ LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
} else {
auto &Ctx = MF.getFunction().getContext();
@@ -7638,7 +7638,7 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
// SDAGisms map cleanly to GISel concepts.
if (NewTy.isVector())
NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
- NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
+ NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
unsigned NewTySize = NewTy.getSizeInBytes();
assert(NewTySize > 0 && "Could not find appropriate type");
diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index 7c6eac8c8ce0..bdcb6432c48e 100644
--- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -305,7 +305,7 @@ bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
const auto &DL = MF->getFunction().getParent()->getDataLayout();
bool AnyMerged = false;
do {
- unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size());
+ unsigned NumPow2 = llvm::bit_floor(StoresToMerge.size());
unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedValue();
// Compute the biggest store we can generate to handle the number of stores.
unsigned MergeSizeBits;
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 282bdb95acac..006f697b6187 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -1277,7 +1277,7 @@ void MCAsmStreamer::emitValueImpl(const MCExpr *Value, unsigned Size,
unsigned Remaining = Size - Emitted;
// The size of our partial emission must be a power of two less than
// Size.
- unsigned EmissionSize = PowerOf2Floor(std::min(Remaining, Size - 1));
+ unsigned EmissionSize = llvm::bit_floor(std::min(Remaining, Size - 1));
// Calculate the byte offset of our partial emission taking into account
// the endianness of the target.
unsigned ByteOffset =
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index bd4da1b282dd..c49e51332b12 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -3451,7 +3451,7 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
Alignment = 1;
else if (!isPowerOf2_64(Alignment)) {
ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2");
- Alignment = PowerOf2Floor(Alignment);
+ Alignment = llvm::bit_floor<uint64_t>(Alignment);
}
if (!isUInt<32>(Alignment)) {
ReturnVal |= Error(AlignmentLoc, "alignment must be smaller than 2**32");
diff --git a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
index a2e6a9e0e0f1..393548dd5bd3 100644
--- a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
+++ b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -320,7 +320,7 @@ uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const {
continue;
}
- uint64_t ResourceMask = PowerOf2Floor(ReadyMask);
+ uint64_t ResourceMask = llvm::bit_floor(ReadyMask);
auto it = AvailableUnits.find(ResourceMask);
if (it == AvailableUnits.end()) {
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index 24729bd4f034..7f3a4bb11b27 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -123,7 +123,7 @@ static void initializeUsedResources(InstrDesc &ID,
ResourcePlusCycles &A = Worklist[I];
if (!A.second.size()) {
assert(llvm::popcount(A.first) > 1 && "Expected a group!");
- UsedResourceGroups |= PowerOf2Floor(A.first);
+ UsedResourceGroups |= llvm::bit_floor(A.first);
continue;
}
@@ -134,7 +134,7 @@ static void initializeUsedResources(InstrDesc &ID,
UsedResourceUnits |= A.first;
} else {
// Remove the leading 1 from the resource group mask.
- NormalizedMask ^= PowerOf2Floor(NormalizedMask);
+ NormalizedMask ^= llvm::bit_floor(NormalizedMask);
if (UnitsFromResourceGroups & NormalizedMask)
ID.HasPartiallyOverlappingGroups = true;
@@ -172,7 +172,7 @@ static void initializeUsedResources(InstrDesc &ID,
for (ResourcePlusCycles &RPC : ID.Resources) {
if (llvm::popcount(RPC.first) > 1 && !RPC.second.isReserved()) {
// Remove the leading 1 from the resource group mask.
- uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
+ uint64_t Mask = RPC.first ^ llvm::bit_floor(RPC.first);
uint64_t MaxResourceUnits = llvm::popcount(Mask);
if (RPC.second.NumUnits > (unsigned)llvm::popcount(Mask)) {
RPC.second.setReserved();
diff --git a/llvm/lib/Support/FoldingSet.cpp b/llvm/lib/Support/FoldingSet.cpp
index ece31b971c1c..419bf6740768 100644
--- a/llvm/lib/Support/FoldingSet.cpp
+++ b/llvm/lib/Support/FoldingSet.cpp
@@ -269,7 +269,7 @@ void FoldingSetBase::reserve(unsigned EltCount, const FoldingSetInfo &Info) {
// range of 1.0 - 2.0.
if(EltCount < capacity())
return;
- GrowBucketCount(PowerOf2Floor(EltCount), Info);
+ GrowBucketCount(llvm::bit_floor(EltCount), Info);
}
/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 992200b076d0..20fca4638118 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1284,7 +1284,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// We're probably decomposing an odd sized store. Try to split
// to the widest type. TODO: Account for alignment. As-is it
// should be OK, since the new parts will be further legalized.
- unsigned FloorSize = PowerOf2Floor(DstSize);
+ unsigned FloorSize = llvm::bit_floor(DstSize);
return std::pair(
0, LLT::scalarOrVector(
ElementCount::getFixed(FloorSize / EltSize), EltTy));
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 77630cf027fa..8031fe511cfd 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7351,7 +7351,7 @@ SDValue PPCTargetLowering::LowerCall_AIX(
"Unexpected register residue for by-value argument.");
SDValue ResidueVal;
for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
- const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
+ const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
const MVT VT =
N == 1 ? MVT::i8
: ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index cc881406666c..e376f2e7a6da 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -159,11 +159,11 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const {
RVVBitsMax = std::max(RVVBitsMin, RVVBitsMax);
}
- RVVBitsMin =
- PowerOf2Floor((RVVBitsMin < 64 || RVVBitsMin > 65536) ? 0 : RVVBitsMin);
+ RVVBitsMin = llvm::bit_floor(
+ (RVVBitsMin < 64 || RVVBitsMin > 65536) ? 0 : RVVBitsMin);
}
RVVBitsMax =
- PowerOf2Floor((RVVBitsMax < 64 || RVVBitsMax > 65536) ? 0 : RVVBitsMax);
+ llvm::bit_floor((RVVBitsMax < 64 || RVVBitsMax > 65536) ? 0 : RVVBitsMax);
SmallString<512> Key;
Key += "RVVMin";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b6d90a28da9b..6a41f24151e5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19508,7 +19508,7 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(SubvecElts != NumElts && "Identity shuffle?");
// Clip to a power 2.
- SubvecElts = PowerOf2Floor(SubvecElts);
+ SubvecElts = llvm::bit_floor<uint32_t>(SubvecElts);
// Make sure the number of zeroable bits in the top at least covers the bits
// not covered by the subvector.
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 001ef55ba472..d68069b535dd 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -5122,7 +5122,7 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
// gcd(Offset, Alignment) is an alignment.
uint32_t gcd = std::gcd(uint32_t(abs((int32_t)Offset)), Alignment);
- Alignment = llvm::PowerOf2Floor(gcd);
+ Alignment = llvm::bit_floor(gcd);
}
}
@@ -5258,7 +5258,7 @@ struct AAAlignFloating : AAAlignImpl {
uint32_t gcd =
std::gcd(uint32_t(abs((int32_t)Offset)), uint32_t(PA.value()));
- Alignment = llvm::PowerOf2Floor(gcd);
+ Alignment = llvm::bit_floor(gcd);
} else {
Alignment = V.getPointerAlignment(DL).value();
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a28099d8ba7d..2c8fd17a9bdb 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4932,7 +4932,7 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
// the memory accesses that is most restrictive (involved in the smallest
// dependence distance).
unsigned MaxSafeElements =
- PowerOf2Floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
+ llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElements);
auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElements);
@@ -5187,7 +5187,7 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
// Ensure MaxVF is a power of 2; the dependence distance bound may not be.
// Note that both WidestRegister and WidestType may not be a powers of 2.
auto MaxVectorElementCount = ElementCount::get(
- PowerOf2Floor(WidestRegister.getKnownMinValue() / WidestType),
+ llvm::bit_floor(WidestRegister.getKnownMinValue() / WidestType),
ComputeScalableMaxVF);
MaxVectorElementCount = MinVF(MaxVectorElementCount, MaxSafeVF);
LLVM_DEBUG(dbgs() << "LV: The Widest register safe to use is: "
@@ -5214,7 +5214,7 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
// power of two which doesn't exceed TC.
// If MaxVectorElementCount is scalable, we only fall back on a fixed VF
// when the TC is less than or equal to the known number of lanes.
- auto ClampedConstTripCount = PowerOf2Floor(ConstTripCount);
+ auto ClampedConstTripCount = llvm::bit_floor(ConstTripCount);
LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to maximum power of two not "
"exceeding the constant trip count: "
<< ClampedConstTripCount << "\n");
@@ -5228,7 +5228,7 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
if (MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 &&
TTI.shouldMaximizeVectorBandwidth(RegKind))) {
auto MaxVectorElementCountMaxBW = ElementCount::get(
- PowerOf2Floor(WidestRegister.getKnownMinValue() / SmallestType),
+ llvm::bit_floor(WidestRegister.getKnownMinValue() / SmallestType),
ComputeScalableMaxVF);
MaxVectorElementCountMaxBW = MinVF(MaxVectorElementCountMaxBW, MaxSafeVF);
@@ -5750,12 +5750,12 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
if (R.LoopInvariantRegs.find(pair.first) != R.LoopInvariantRegs.end())
LoopInvariantRegs = R.LoopInvariantRegs[pair.first];
- unsigned TmpIC = PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs) / MaxLocalUsers);
+ unsigned TmpIC = llvm::bit_floor((TargetNumRegisters - LoopInvariantRegs) /
+ MaxLocalUsers);
// Don't count the induction variable as interleaved.
if (EnableIndVarRegisterHeur) {
- TmpIC =
- PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs - 1) /
- std::max(1U, (MaxLocalUsers - 1)));
+ TmpIC = llvm::bit_floor((TargetNumRegisters - LoopInvariantRegs - 1) /
+ std::max(1U, (MaxLocalUsers - 1)));
}
IC = std::min(IC, TmpIC);
@@ -5834,8 +5834,8 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
// We assume that the cost overhead is 1 and we use the cost model
// to estimate the cost of the loop and interleave until the cost of the
// loop overhead is about 5% of the cost of the loop.
- unsigned SmallIC = std::min(
- IC, (unsigned)PowerOf2Floor(SmallLoopCost / *LoopCost.getValue()));
+ unsigned SmallIC = std::min(IC, (unsigned)llvm::bit_floor<uint64_t>(
+ SmallLoopCost / *LoopCost.getValue()));
// Interleave until store/load ports (estimated by max interleave count) are
// saturated.
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e3eb6b1804e7..60333aa28a0e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11536,7 +11536,7 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
unsigned MaxVecRegSize = R.getMaxVecRegSize();
unsigned EltSize = R.getVectorElementSize(Operands[0]);
- unsigned MaxElts = llvm::PowerOf2Floor(MaxVecRegSize / EltSize);
+ unsigned MaxElts = llvm::bit_floor(MaxVecRegSize / EltSize);
unsigned MaxVF = std::min(R.getMaximumVF(EltSize, Instruction::Store),
MaxElts);
@@ -11663,7 +11663,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
unsigned Sz = R.getVectorElementSize(I0);
unsigned MinVF = R.getMinVF(Sz);
- unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
+ unsigned MaxVF = std::max<unsigned>(llvm::bit_floor(VL.size()), MinVF);
MaxVF = std::min(R.getMaximumVF(Sz, S.getOpcode()), MaxVF);
if (MaxVF < 2) {
R.getORE()->emit([&]() {
@@ -12525,10 +12525,11 @@ class HorizontalReduction {
unsigned MaxVecRegSize = V.getMaxVecRegSize();
unsigned EltSize = V.getVectorElementSize(Candidates[0]);
- unsigned MaxElts = RegMaxNumber * PowerOf2Floor(MaxVecRegSize / EltSize);
+ unsigned MaxElts =
+ RegMaxNumber * llvm::bit_floor(MaxVecRegSize / EltSize);
unsigned ReduxWidth = std::min<unsigned>(
- PowerOf2Floor(NumReducedVals), std::max(RedValsMaxNumber, MaxElts));
+ llvm::bit_floor(NumReducedVals), std::max(RedValsMaxNumber, MaxElts));
unsigned Start = 0;
unsigned Pos = Start;
// Restarts vectorization attempt with lower vector factor.
@@ -12749,7 +12750,7 @@ class HorizontalReduction {
}
Pos += ReduxWidth;
Start = Pos;
- ReduxWidth = PowerOf2Floor(NumReducedVals - Pos);
+ ReduxWidth = llvm::bit_floor(NumReducedVals - Pos);
}
}
if (VectorizedTree) {
More information about the llvm-commits
mailing list