[llvm] [SystemZ] Provide improved cost estimates (PR #83873)
Dominik Steenken via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 11 01:25:26 PDT 2024
================
@@ -1284,17 +1286,43 @@ InstructionCost SystemZTTIImpl::getInterleavedMemoryOpCost(
return NumVectorMemOps + NumPermutes;
}
-static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {
+static int
+getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ const SmallVectorImpl<Type *> &ParamTys) {
if (RetTy->isVectorTy() && ID == Intrinsic::bswap)
return getNumVectorRegs(RetTy); // VPERM
+
+ if (ID == Intrinsic::vector_reduce_add) {
+ // Retrieve number and size of elements for the vector op.
+ auto *VTy = cast<FixedVectorType>(ParamTys.front());
+ unsigned NumElements = VTy->getNumElements();
+ unsigned ScalarSize = VTy->getScalarSizeInBits();
+ // For scalar sizes >128 bits, we fall back to the generic cost estimate.
+ if (ScalarSize > SystemZ::VectorBits)
+ return -1;
+ // A single vector register can hold this many elements.
+ unsigned MaxElemsPerVector = SystemZ::VectorBits / ScalarSize;
+ // This many vector regs are needed to represent the input elements (V).
+ unsigned VectorRegsNeeded = getNumVectorRegs(VTy);
+ // This many instructions are needed for the final sum of vector elems (S).
+ unsigned LastVectorHandling =
+ 2 * Log2_32_Ceil(std::min(NumElements, MaxElemsPerVector));
+ // We use vector adds to create a sum vector, which takes
+ // V/2 + V/4 + ... = V - 1 operations.
+ // Then, we need S operations to sum up the elements of that sum vector,
+ // for a total of V + S - 1 operations.
+ int Cost = VectorRegsNeeded + LastVectorHandling - 1;
+ assert(Cost > 0 && "Predicted cost of vector.reduce.add must be > 0");
----------------
dominik-steenken wrote:
done
https://github.com/llvm/llvm-project/pull/83873
More information about the llvm-commits
mailing list