[llvm] [LoopVectorize] Add cost of generating tail-folding mask to the loop (PR #90191)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Mon May 20 05:26:01 PDT 2024


================
@@ -5942,6 +5945,48 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
   return Discount;
 }
 
+InstructionCost
+LoopVectorizationCostModel::getTailFoldMaskCost(ElementCount VF) {
+  if (VF.isScalar())
+    return 0;
+
+  InstructionCost MaskCost;
+  Type *IndTy = Legal->getWidestInductionType();
+  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+  TailFoldingStyle Style = getTailFoldingStyle();
+  LLVMContext &Context = TheLoop->getHeader()->getContext();
+  Type *I1Ty = IntegerType::getInt1Ty(Context);
+
+  if (Style == TailFoldingStyle::DataWithEVL) {
+    Type *I32Ty = IntegerType::getInt32Ty(Context);
+    IntrinsicCostAttributes Attrs(
+        Intrinsic::experimental_get_vector_length, I32Ty,
+        {PoisonValue::get(IndTy), PoisonValue::get(I32Ty),
+         PoisonValue::get(I1Ty)});
+    MaskCost = TTI.getIntrinsicInstrCost(Attrs, CostKind);
+    MaskCost += TTI.getArithmeticInstrCost(Instruction::Sub, IndTy, CostKind);
+    return MaskCost;
+  }
+
+  VectorType *RetTy = VectorType::get(I1Ty, VF);
+  if (useActiveLaneMask(Style)) {
+    IntrinsicCostAttributes Attrs(
+        Intrinsic::get_active_lane_mask, RetTy,
+        {PoisonValue::get(IndTy), PoisonValue::get(IndTy)});
+    MaskCost = TTI.getIntrinsicInstrCost(Attrs, CostKind);
+  } else {
+    // This is just a stepvector, added to a splat of the current IV, followed
+    // by a vector comparison with a splat of the trip count. Since the
+    // stepvector is loop invariant it will be hoisted out so we can ignore it.
+    // This just leaves us with an add and an icmp.
+    VectorType *VecTy = VectorType::get(IndTy, VF);
+    MaskCost = TTI.getArithmeticInstrCost(Instruction::Add, VecTy, CostKind);
----------------
fhahn wrote:

Is this entirely accurate? We might need to also broadcast the canonical IV or there may be not increment needed, if we can re-use an existing widened IV. Unfortunately I am not sure if we can query this accurately at this point.

https://github.com/llvm/llvm-project/pull/90191


More information about the llvm-commits mailing list