[llvm] [SystemZ] Provide improved cost estimates (PR #83873)

Dominik Steenken via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 11 01:25:26 PDT 2024


================
@@ -1284,17 +1286,43 @@ InstructionCost SystemZTTIImpl::getInterleavedMemoryOpCost(
   return NumVectorMemOps + NumPermutes;
 }
 
-static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {
+static int
+getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                            const SmallVectorImpl<Type *> &ParamTys) {
   if (RetTy->isVectorTy() && ID == Intrinsic::bswap)
     return getNumVectorRegs(RetTy); // VPERM
+
+  if (ID == Intrinsic::vector_reduce_add) {
+    // Retrieve number and size of elements for the vector op.
+    auto *VTy = cast<FixedVectorType>(ParamTys.front());
+    unsigned NumElements = VTy->getNumElements();
+    unsigned ScalarSize = VTy->getScalarSizeInBits();
+    // For scalar sizes >128 bits, we fall back to the generic cost estimate.
+    if (ScalarSize > SystemZ::VectorBits)
+      return -1;
+    // A single vector register can hold this many elements.
+    unsigned MaxElemsPerVector = SystemZ::VectorBits / ScalarSize;
+    // This many vector regs are needed to represent the input elements (V).
+    unsigned VectorRegsNeeded = getNumVectorRegs(VTy);
+    // This many instructions are needed for the final sum of vector elems (S).
+    unsigned LastVectorHandling =
+        2 * Log2_32_Ceil(std::min(NumElements, MaxElemsPerVector));
+    // We use vector adds to create a sum vector, which takes
+    // V/2 + V/4 + ... = V - 1 operations.
+    // Then, we need S operations to sum up the elements of that sum vector,
+    // for a total of V + S - 1 operations.
+    int Cost = VectorRegsNeeded + LastVectorHandling - 1;
+    assert(Cost > 0 && "Predicted cost of vector.reduce.add must be > 0");
----------------
dominik-steenken wrote:

done

https://github.com/llvm/llvm-project/pull/83873


More information about the llvm-commits mailing list