[llvm] [Vectorize] Vectorization for __builtin_prefetch (PR #66160)

Thu Nov 30 05:36:56 PST 2023

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff 062058ef36c3a5a41f5c2ad2fd1a53f7a099e956 3a8518a6e4d49054b34a3d6515319e2ec237f9b2 -- llvm/include/llvm/Analysis/TargetTransformInfo.h llvm/include/llvm/Analysis/TargetTransformInfoImpl.h llvm/include/llvm/CodeGen/BasicTTIImpl.h llvm/include/llvm/IR/IRBuilder.h llvm/include/llvm/IR/IntrinsicInst.h llvm/lib/Analysis/TargetTransformInfo.cpp llvm/lib/Analysis/VectorUtils.cpp llvm/lib/IR/IRBuilder.cpp llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/lib/Transforms/Vectorize/VPlan.h llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
``````````

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index d9af917754f..ec16dd287ca 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1568,9 +1568,10 @@ public:
       bool VarMask = !isa<Constant>(Mask);
       Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
       auto *MaskVT = cast<VectorType>(Mask->getType());
-      auto *PsudoDataTy = MaskVT->getWithNewBitWidth(Alignment.value()*8);
-      return thisT()->getGatherScatterOpCost(Instruction::Call, PsudoDataTy, Args[0],
-                                             VarMask, Alignment, CostKind, I);
+      auto *PsudoDataTy = MaskVT->getWithNewBitWidth(Alignment.value() * 8);
+      return thisT()->getGatherScatterOpCost(Instruction::Call, PsudoDataTy,
+                                             Args[0], VarMask, Alignment,
+                                             CostKind, I);
     }
     case Intrinsic::experimental_stepvector: {
       if (isa<ScalableVectorType>(RetTy))
@@ -1893,8 +1894,8 @@ public:
       auto *MaskVT = cast<VectorType>(ICA.getArgTypes()[4]);
       Type *PsudoTy = MaskVT->getWithNewBitWidth(32);
       Align TyAlign = thisT()->DL.getABITypeAlign(PsudoTy);
-      return thisT()->getMaskedMemoryOpCost(Instruction::Call, PsudoTy, TyAlign, 0,
-                                            CostKind);
+      return thisT()->getMaskedMemoryOpCost(Instruction::Call, PsudoTy, TyAlign,
+                                            0, CostKind);
     }
     case Intrinsic::vector_reduce_add:
       return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy,
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index bfe0ccb8125..9cf9ba49b8d 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -817,7 +817,8 @@ public:
 
   /// Create a call to Masked Gather Prefetch intrinsic
   CallInst *CreateMaskedGatherPrefetch(Value *Ptrs, Value *ElemSize,
-                                       Value *Mask = nullptr, Value *RW = nullptr,
+                                       Value *Mask = nullptr,
+                                       Value *RW = nullptr,
                                        Value *Locality = nullptr,
                                        const Twine &Name = "");
 
@@ -2091,8 +2092,7 @@ public:
     return CreateCast(Instruction::AddrSpaceCast, V, DestTy, Name);
   }
 
-  Value *CreateZExtOrBitCast(Value *V, Type *DestTy,
-                             const Twine &Name = "") {
+  Value *CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
     if (auto *VC = dyn_cast<Constant>(V))
@@ -2100,8 +2100,7 @@ public:
     return Insert(CastInst::CreateZExtOrBitCast(V, DestTy), Name);
   }
 
-  Value *CreateSExtOrBitCast(Value *V, Type *DestTy,
-                             const Twine &Name = "") {
+  Value *CreateSExtOrBitCast(Value *V, Type *DestTy, const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
     if (auto *VC = dyn_cast<Constant>(V))
@@ -2109,8 +2108,7 @@ public:
     return Insert(CastInst::CreateSExtOrBitCast(V, DestTy), Name);
   }
 
-  Value *CreateTruncOrBitCast(Value *V, Type *DestTy,
-                              const Twine &Name = "") {
+  Value *CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
     if (auto *VC = dyn_cast<Constant>(V))
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index a81592aca6d..217cbca666e 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -1373,7 +1373,8 @@ inline Type *getPrefetchPseudoType(Value *I) {
   // Get type for the following pattern
   // ex) %1 = add nuw nsw i64 %indvars.iv, 8
   //     %arrayidx = getelementptr inbounds double, ptr %b, i64 %1
-  //     tail call void @llvm.prefetch.p0(ptr nonnull %arrayidx, i32 0, i32 3, i32 1)
+  //     tail call void @llvm.prefetch.p0(ptr nonnull %arrayidx, i32 0, i32 3,
+  //     i32 1)
   auto *GEP = dyn_cast<GetElementPtrInst>(Prefetch->getPointerOperand());
   if (GEP) {
     auto *ElemTy = GEP->getSourceElementType();
@@ -1400,10 +1401,11 @@ inline Type *getPrefetchPseudoType(Value *I) {
 inline Align getPrefetchPseudoAlignment(Value *I) {
   assert(isa<PrefetchInst>(I) && "Expected Prefetch instruction");
   auto *Ty = getPrefetchPseudoType(I);
-  return Ty? Align(Ty->getScalarSizeInBits()>>3) : Align(1ULL);
+  return Ty ? Align(Ty->getScalarSizeInBits() >> 3) : Align(1ULL);
 }
 
-/// A helper function that returns the alignment of load/store/prefetch instruction.
+/// A helper function that returns the alignment of load/store/prefetch
+/// instruction.
 inline Align getLdStPfAlignment(Value *I) {
   if (isa<PrefetchInst>(I))
     return getPrefetchPseudoAlignment(I);
@@ -1430,7 +1432,8 @@ inline unsigned getLdStPfAddressSpace(Value *I) {
   return getLoadStoreAddressSpace(I);
 }
 
-/// A helper function that returns the type of a load/store/prefetch instruction.
+/// A helper function that returns the type of a load/store/prefetch
+/// instruction.
 inline Type *getLdStPfType(Value *I) {
   if (isa<PrefetchInst>(I))
     return getPrefetchPseudoType(I);
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index fc62ae568f6..2606a61e681 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -547,8 +547,7 @@ CallInst *IRBuilderBase::CreateMaskedStore(Value *Val, Value *Ptr,
 /// \p RW       - Read or Write
 /// \p Locality - Cache Level
 /// \p Name     - name of the result variable
-CallInst *IRBuilderBase::CreateMaskedPrefetch(Value *Ptr,
-                                              Value *ElemSize,
+CallInst *IRBuilderBase::CreateMaskedPrefetch(Value *Ptr, Value *ElemSize,
                                               Value *Mask, Value *RW,
                                               Value *Locality,
                                               const Twine &Name) {
@@ -556,8 +555,8 @@ CallInst *IRBuilderBase::CreateMaskedPrefetch(Value *Ptr,
   assert(Mask && "Mask should not be all-ones (null)");
   Type *OverloadedTypes[] = {PtrTy, Mask->getType()};
   Value *Ops[] = {Ptr, ElemSize, RW, Locality, Mask};
-  return CreateMaskedIntrinsic(Intrinsic::masked_prefetch, Ops,
-                               OverloadedTypes, Name);
+  return CreateMaskedIntrinsic(Intrinsic::masked_prefetch, Ops, OverloadedTypes,
+                               Name);
 }
 
 /// Create a call to a Masked intrinsic, with given intrinsic Id,
@@ -674,7 +673,8 @@ CallInst *IRBuilderBase::CreateMaskedCompressStore(Value *Val, Value *Ptr,
 /// \p RW       - Read or Write
 /// \p Locality - Cache Level
 /// \p Name     - name of the result variable
-CallInst *IRBuilderBase::CreateMaskedGatherPrefetch(Value *Ptrs, Value *ElemSize,
+CallInst *IRBuilderBase::CreateMaskedGatherPrefetch(Value *Ptrs,
+                                                    Value *ElemSize,
                                                     Value *Mask, Value *RW,
                                                     Value *Locality,
                                                     const Twine &Name) {
@@ -690,8 +690,8 @@ CallInst *IRBuilderBase::CreateMaskedGatherPrefetch(Value *Ptrs, Value *ElemSize
 
   // We specify only one type when we create this intrinsic. Types of other
   // arguments are derived from this type.
-  return CreateMaskedIntrinsic(Intrinsic::masked_gather_prefetch, Ops, OverloadedTypes,
-                               Name);
+  return CreateMaskedIntrinsic(Intrinsic::masked_gather_prefetch, Ops,
+                               OverloadedTypes, Name);
 }
 
 template <typename T0>
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6386df48e33..ad1f16623c3 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1470,7 +1470,8 @@ public:
 
   /// Returns true if the target machine supports masked prefetch operation
   /// for the given \p DataType and kind of access to \p Ptr.
-  bool isLegalMaskedPrefetch(Type *DataType, Value *Ptr, Align Alignment) const {
+  bool isLegalMaskedPrefetch(Type *DataType, Value *Ptr,
+                             Align Alignment) const {
     return Legal->isConsecutivePtr(DataType, Ptr) &&
            TTI.isLegalMaskedPrefetch(DataType, Alignment);
   }
@@ -3852,8 +3853,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
           TTI->preferPredicatedReductionSelect(
               RdxDesc.getOpcode(), PhiTy,
               TargetTransformInfo::ReductionFlags())) {
-        auto *VecRdxPhi =
-            cast<PHINode>(State.get(PhiR, Part));
+        auto *VecRdxPhi = cast<PHINode>(State.get(PhiR, Part));
         VecRdxPhi->setIncomingValueForBlock(VectorLoopLatch, Sel);
       }
     }
@@ -4505,7 +4505,8 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
 bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
     Instruction *I, ElementCount VF) {
   // Get and ensure we have a valid memory instruction.
-  assert((isa<LoadInst, StoreInst, PrefetchInst>(I)) && "Invalid memory instruction");
+  assert((isa<LoadInst, StoreInst, PrefetchInst>(I)) &&
+         "Invalid memory instruction");
 
   auto *Ptr = getLdStPfPointerOperand(I);
   auto *ScalarTy = getLdStPfType(I);
@@ -8358,18 +8359,17 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
 
   // Is it beneficial to perform intrinsic call compared to lib call?
   bool ShouldUseVectorIntrinsic =
-      ID && LoopVectorizationPlanner::getDecisionAndClampRange(
-                [&](ElementCount VF) -> bool {
-                  Function *Variant;
-                  // Is it beneficial to perform intrinsic call compared to lib
-                  // call?
-                  InstructionCost CallCost =
-                      CM.getVectorCallCost(CI, VF, &Variant);
-                  InstructionCost IntrinsicCost =
-                      CM.getVectorIntrinsicCost(CI, VF);
-                  return IntrinsicCost <= CallCost;
-                },
-                Range);
+      ID &&
+      LoopVectorizationPlanner::getDecisionAndClampRange(
+          [&](ElementCount VF) -> bool {
+            Function *Variant;
+            // Is it beneficial to perform intrinsic call compared to lib
+            // call?
+            InstructionCost CallCost = CM.getVectorCallCost(CI, VF, &Variant);
+            InstructionCost IntrinsicCost = CM.getVectorIntrinsicCost(CI, VF);
+            return IntrinsicCost <= CallCost;
+          },
+          Range);
   if (ShouldUseVectorIntrinsic)
     return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()), ID);
 
@@ -8755,8 +8755,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
     LaneMaskPhi->addOperand(ALM);
 
     if (Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
-      // Do the increment of the canonical IV after the active.lane.mask, because
-      // that value is still based off %CanonicalIVPHI
+      // Do the increment of the canonical IV after the active.lane.mask,
+      // because that value is still based off %CanonicalIVPHI
       EB->appendRecipe(CanonicalIVIncrement);
     }
 
@@ -9611,14 +9611,13 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
         auto *VecPtr =
             CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
         if (isMaskRequired)
-          NewPF = Builder.CreateMaskedPrefetch(VecPtr, ElemSize,
-                                               BlockInMaskParts[Part],
-                                               RW, Locality);
+          NewPF = Builder.CreateMaskedPrefetch(
+              VecPtr, ElemSize, BlockInMaskParts[Part], RW, Locality);
         else {
           auto *MaskPart = Constant::getAllOnesValue(
               VectorType::get(Type::getInt1Ty(DataTy->getContext()), DataTy));
-          NewPF = Builder.CreateMaskedPrefetch(VecPtr, ElemSize,
-                                               MaskPart, RW, Locality);
+          NewPF = Builder.CreateMaskedPrefetch(VecPtr, ElemSize, MaskPart, RW,
+                                               Locality);
         }
       }
       State.addMetadata(NewPF, PF);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index ae9d70b80c1..c6ed25adfaa 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1949,8 +1949,9 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
   }
 
   bool isMasked() const {
-    return isPrefetch() ? getNumOperands() == 5 :
-              isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
+    return isPrefetch() ? getNumOperands() == 5
+           : isStore()  ? getNumOperands() == 3
+                        : getNumOperands() == 2;
   }
 
 public:
@@ -1972,8 +1973,8 @@ public:
     setMask(Mask);
   }
 
-  VPWidenMemoryInstructionRecipe(PrefetchInst &Prefetch, VPValue *Addr, VPValue *Mask,
-                                 bool Consecutive, bool Reverse)
+  VPWidenMemoryInstructionRecipe(PrefetchInst &Prefetch, VPValue *Addr,
+                                 VPValue *Mask, bool Consecutive, bool Reverse)
       : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}),
         Ingredient(Prefetch), Consecutive(Consecutive), Reverse(Reverse) {
     assert((Consecutive || !Reverse) && "Reverse implies consecutive");

``````````

</details>


https://github.com/llvm/llvm-project/pull/66160