[llvm] [Vectorize] Vectorization for __builtin_prefetch (PR #66160)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 30 05:36:56 PST 2023
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff 062058ef36c3a5a41f5c2ad2fd1a53f7a099e956 3a8518a6e4d49054b34a3d6515319e2ec237f9b2 -- llvm/include/llvm/Analysis/TargetTransformInfo.h llvm/include/llvm/Analysis/TargetTransformInfoImpl.h llvm/include/llvm/CodeGen/BasicTTIImpl.h llvm/include/llvm/IR/IRBuilder.h llvm/include/llvm/IR/IntrinsicInst.h llvm/lib/Analysis/TargetTransformInfo.cpp llvm/lib/Analysis/VectorUtils.cpp llvm/lib/IR/IRBuilder.cpp llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/lib/Transforms/Vectorize/VPlan.h llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
``````````
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index d9af917754f..ec16dd287ca 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1568,9 +1568,10 @@ public:
bool VarMask = !isa<Constant>(Mask);
Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
auto *MaskVT = cast<VectorType>(Mask->getType());
- auto *PsudoDataTy = MaskVT->getWithNewBitWidth(Alignment.value()*8);
- return thisT()->getGatherScatterOpCost(Instruction::Call, PsudoDataTy, Args[0],
- VarMask, Alignment, CostKind, I);
+ auto *PsudoDataTy = MaskVT->getWithNewBitWidth(Alignment.value() * 8);
+ return thisT()->getGatherScatterOpCost(Instruction::Call, PsudoDataTy,
+ Args[0], VarMask, Alignment,
+ CostKind, I);
}
case Intrinsic::experimental_stepvector: {
if (isa<ScalableVectorType>(RetTy))
@@ -1893,8 +1894,8 @@ public:
auto *MaskVT = cast<VectorType>(ICA.getArgTypes()[4]);
Type *PsudoTy = MaskVT->getWithNewBitWidth(32);
Align TyAlign = thisT()->DL.getABITypeAlign(PsudoTy);
- return thisT()->getMaskedMemoryOpCost(Instruction::Call, PsudoTy, TyAlign, 0,
- CostKind);
+ return thisT()->getMaskedMemoryOpCost(Instruction::Call, PsudoTy, TyAlign,
+ 0, CostKind);
}
case Intrinsic::vector_reduce_add:
return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy,
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index bfe0ccb8125..9cf9ba49b8d 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -817,7 +817,8 @@ public:
/// Create a call to Masked Gather Prefetch intrinsic
CallInst *CreateMaskedGatherPrefetch(Value *Ptrs, Value *ElemSize,
- Value *Mask = nullptr, Value *RW = nullptr,
+ Value *Mask = nullptr,
+ Value *RW = nullptr,
Value *Locality = nullptr,
const Twine &Name = "");
@@ -2091,8 +2092,7 @@ public:
return CreateCast(Instruction::AddrSpaceCast, V, DestTy, Name);
}
- Value *CreateZExtOrBitCast(Value *V, Type *DestTy,
- const Twine &Name = "") {
+ Value *CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name = "") {
if (V->getType() == DestTy)
return V;
if (auto *VC = dyn_cast<Constant>(V))
@@ -2100,8 +2100,7 @@ public:
return Insert(CastInst::CreateZExtOrBitCast(V, DestTy), Name);
}
- Value *CreateSExtOrBitCast(Value *V, Type *DestTy,
- const Twine &Name = "") {
+ Value *CreateSExtOrBitCast(Value *V, Type *DestTy, const Twine &Name = "") {
if (V->getType() == DestTy)
return V;
if (auto *VC = dyn_cast<Constant>(V))
@@ -2109,8 +2108,7 @@ public:
return Insert(CastInst::CreateSExtOrBitCast(V, DestTy), Name);
}
- Value *CreateTruncOrBitCast(Value *V, Type *DestTy,
- const Twine &Name = "") {
+ Value *CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name = "") {
if (V->getType() == DestTy)
return V;
if (auto *VC = dyn_cast<Constant>(V))
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index a81592aca6d..217cbca666e 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -1373,7 +1373,8 @@ inline Type *getPrefetchPseudoType(Value *I) {
// Get type for the following pattern
// ex) %1 = add nuw nsw i64 %indvars.iv, 8
// %arrayidx = getelementptr inbounds double, ptr %b, i64 %1
- // tail call void @llvm.prefetch.p0(ptr nonnull %arrayidx, i32 0, i32 3, i32 1)
+ // tail call void @llvm.prefetch.p0(ptr nonnull %arrayidx, i32 0, i32 3,
+ // i32 1)
auto *GEP = dyn_cast<GetElementPtrInst>(Prefetch->getPointerOperand());
if (GEP) {
auto *ElemTy = GEP->getSourceElementType();
@@ -1400,10 +1401,11 @@ inline Type *getPrefetchPseudoType(Value *I) {
inline Align getPrefetchPseudoAlignment(Value *I) {
assert(isa<PrefetchInst>(I) && "Expected Prefetch instruction");
auto *Ty = getPrefetchPseudoType(I);
- return Ty? Align(Ty->getScalarSizeInBits()>>3) : Align(1ULL);
+ return Ty ? Align(Ty->getScalarSizeInBits() >> 3) : Align(1ULL);
}
-/// A helper function that returns the alignment of load/store/prefetch instruction.
+/// A helper function that returns the alignment of load/store/prefetch
+/// instruction.
inline Align getLdStPfAlignment(Value *I) {
if (isa<PrefetchInst>(I))
return getPrefetchPseudoAlignment(I);
@@ -1430,7 +1432,8 @@ inline unsigned getLdStPfAddressSpace(Value *I) {
return getLoadStoreAddressSpace(I);
}
-/// A helper function that returns the type of a load/store/prefetch instruction.
+/// A helper function that returns the type of a load/store/prefetch
+/// instruction.
inline Type *getLdStPfType(Value *I) {
if (isa<PrefetchInst>(I))
return getPrefetchPseudoType(I);
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index fc62ae568f6..2606a61e681 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -547,8 +547,7 @@ CallInst *IRBuilderBase::CreateMaskedStore(Value *Val, Value *Ptr,
/// \p RW - Read or Write
/// \p Locality - Cache Level
/// \p Name - name of the result variable
-CallInst *IRBuilderBase::CreateMaskedPrefetch(Value *Ptr,
- Value *ElemSize,
+CallInst *IRBuilderBase::CreateMaskedPrefetch(Value *Ptr, Value *ElemSize,
Value *Mask, Value *RW,
Value *Locality,
const Twine &Name) {
@@ -556,8 +555,8 @@ CallInst *IRBuilderBase::CreateMaskedPrefetch(Value *Ptr,
assert(Mask && "Mask should not be all-ones (null)");
Type *OverloadedTypes[] = {PtrTy, Mask->getType()};
Value *Ops[] = {Ptr, ElemSize, RW, Locality, Mask};
- return CreateMaskedIntrinsic(Intrinsic::masked_prefetch, Ops,
- OverloadedTypes, Name);
+ return CreateMaskedIntrinsic(Intrinsic::masked_prefetch, Ops, OverloadedTypes,
+ Name);
}
/// Create a call to a Masked intrinsic, with given intrinsic Id,
@@ -674,7 +673,8 @@ CallInst *IRBuilderBase::CreateMaskedCompressStore(Value *Val, Value *Ptr,
/// \p RW - Read or Write
/// \p Locality - Cache Level
/// \p Name - name of the result variable
-CallInst *IRBuilderBase::CreateMaskedGatherPrefetch(Value *Ptrs, Value *ElemSize,
+CallInst *IRBuilderBase::CreateMaskedGatherPrefetch(Value *Ptrs,
+ Value *ElemSize,
Value *Mask, Value *RW,
Value *Locality,
const Twine &Name) {
@@ -690,8 +690,8 @@ CallInst *IRBuilderBase::CreateMaskedGatherPrefetch(Value *Ptrs, Value *ElemSize
// We specify only one type when we create this intrinsic. Types of other
// arguments are derived from this type.
- return CreateMaskedIntrinsic(Intrinsic::masked_gather_prefetch, Ops, OverloadedTypes,
- Name);
+ return CreateMaskedIntrinsic(Intrinsic::masked_gather_prefetch, Ops,
+ OverloadedTypes, Name);
}
template <typename T0>
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6386df48e33..ad1f16623c3 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1470,7 +1470,8 @@ public:
/// Returns true if the target machine supports masked prefetch operation
/// for the given \p DataType and kind of access to \p Ptr.
- bool isLegalMaskedPrefetch(Type *DataType, Value *Ptr, Align Alignment) const {
+ bool isLegalMaskedPrefetch(Type *DataType, Value *Ptr,
+ Align Alignment) const {
return Legal->isConsecutivePtr(DataType, Ptr) &&
TTI.isLegalMaskedPrefetch(DataType, Alignment);
}
@@ -3852,8 +3853,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
TTI->preferPredicatedReductionSelect(
RdxDesc.getOpcode(), PhiTy,
TargetTransformInfo::ReductionFlags())) {
- auto *VecRdxPhi =
- cast<PHINode>(State.get(PhiR, Part));
+ auto *VecRdxPhi = cast<PHINode>(State.get(PhiR, Part));
VecRdxPhi->setIncomingValueForBlock(VectorLoopLatch, Sel);
}
}
@@ -4505,7 +4505,8 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
Instruction *I, ElementCount VF) {
// Get and ensure we have a valid memory instruction.
- assert((isa<LoadInst, StoreInst, PrefetchInst>(I)) && "Invalid memory instruction");
+ assert((isa<LoadInst, StoreInst, PrefetchInst>(I)) &&
+ "Invalid memory instruction");
auto *Ptr = getLdStPfPointerOperand(I);
auto *ScalarTy = getLdStPfType(I);
@@ -8358,18 +8359,17 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
// Is it beneficial to perform intrinsic call compared to lib call?
bool ShouldUseVectorIntrinsic =
- ID && LoopVectorizationPlanner::getDecisionAndClampRange(
- [&](ElementCount VF) -> bool {
- Function *Variant;
- // Is it beneficial to perform intrinsic call compared to lib
- // call?
- InstructionCost CallCost =
- CM.getVectorCallCost(CI, VF, &Variant);
- InstructionCost IntrinsicCost =
- CM.getVectorIntrinsicCost(CI, VF);
- return IntrinsicCost <= CallCost;
- },
- Range);
+ ID &&
+ LoopVectorizationPlanner::getDecisionAndClampRange(
+ [&](ElementCount VF) -> bool {
+ Function *Variant;
+ // Is it beneficial to perform intrinsic call compared to lib
+ // call?
+ InstructionCost CallCost = CM.getVectorCallCost(CI, VF, &Variant);
+ InstructionCost IntrinsicCost = CM.getVectorIntrinsicCost(CI, VF);
+ return IntrinsicCost <= CallCost;
+ },
+ Range);
if (ShouldUseVectorIntrinsic)
return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()), ID);
@@ -8755,8 +8755,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
LaneMaskPhi->addOperand(ALM);
if (Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
- // Do the increment of the canonical IV after the active.lane.mask, because
- // that value is still based off %CanonicalIVPHI
+ // Do the increment of the canonical IV after the active.lane.mask,
+ // because that value is still based off %CanonicalIVPHI
EB->appendRecipe(CanonicalIVIncrement);
}
@@ -9611,14 +9611,13 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
auto *VecPtr =
CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
if (isMaskRequired)
- NewPF = Builder.CreateMaskedPrefetch(VecPtr, ElemSize,
- BlockInMaskParts[Part],
- RW, Locality);
+ NewPF = Builder.CreateMaskedPrefetch(
+ VecPtr, ElemSize, BlockInMaskParts[Part], RW, Locality);
else {
auto *MaskPart = Constant::getAllOnesValue(
VectorType::get(Type::getInt1Ty(DataTy->getContext()), DataTy));
- NewPF = Builder.CreateMaskedPrefetch(VecPtr, ElemSize,
- MaskPart, RW, Locality);
+ NewPF = Builder.CreateMaskedPrefetch(VecPtr, ElemSize, MaskPart, RW,
+ Locality);
}
}
State.addMetadata(NewPF, PF);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index ae9d70b80c1..c6ed25adfaa 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1949,8 +1949,9 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
}
bool isMasked() const {
- return isPrefetch() ? getNumOperands() == 5 :
- isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
+ return isPrefetch() ? getNumOperands() == 5
+ : isStore() ? getNumOperands() == 3
+ : getNumOperands() == 2;
}
public:
@@ -1972,8 +1973,8 @@ public:
setMask(Mask);
}
- VPWidenMemoryInstructionRecipe(PrefetchInst &Prefetch, VPValue *Addr, VPValue *Mask,
- bool Consecutive, bool Reverse)
+ VPWidenMemoryInstructionRecipe(PrefetchInst &Prefetch, VPValue *Addr,
+ VPValue *Mask, bool Consecutive, bool Reverse)
: VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}),
Ingredient(Prefetch), Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
``````````
</details>
https://github.com/llvm/llvm-project/pull/66160
More information about the llvm-commits
mailing list