[llvm] Add proper cost computation for llvm.vector.reduce.add (PR #77764)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 11 05:19:20 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-analysis

Author: None (dominik-steenken)

<details>
<summary>Changes</summary>

WIP

---
Full diff: https://github.com/llvm/llvm-project/pull/77764.diff


2 Files Affected:

- (modified) llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp (+14-2) 
- (added) llvm/test/Analysis/CostModel/SystemZ/reduce-add.ll (+32) 


``````````diff
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 1f97e0f761c04d..d8a0923ab980b2 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -1244,9 +1244,21 @@ InstructionCost SystemZTTIImpl::getInterleavedMemoryOpCost(
   return NumVectorMemOps + NumPermutes;
 }
 
-static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {
+static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, const SmallVectorImpl<Type *>& ParamTys) {
+  dbgs() << "getVectorIntrinsicInstrCost entry, ID == " << ID << ", name = " << Intrinsic::getName(ID) << ", compare with " << Intrinsic::getName(Intrinsic::vector_reduce_add) << "(" << Intrinsic::vector_reduce_add << ")\n";
   if (RetTy->isVectorTy() && ID == Intrinsic::bswap)
     return getNumVectorRegs(RetTy); // VPERM
+  else if (ID == Intrinsic::vector_reduce_add) {
+    dbgs() << "getVectorIntrinsicInstrCost is reduce_add intrinsic\n";
+    auto *VTy = cast<FixedVectorType>(ParamTys.front());
+    switch (VTy->getNumElements()) {
+      default: return -1;
+      case 2: return 3;
+      case 4: return 5;
+      case 8: return 7;
+      case 16: return 11;
+    }
+  }
   return -1;
 }
 
@@ -1254,7 +1266,7 @@ InstructionCost
 SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                                       TTI::TargetCostKind CostKind) {
   InstructionCost Cost =
-      getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType());
+      getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType(), ICA.getArgTypes());
   if (Cost != -1)
     return Cost;
   return BaseT::getIntrinsicInstrCost(ICA, CostKind);
diff --git a/llvm/test/Analysis/CostModel/SystemZ/reduce-add.ll b/llvm/test/Analysis/CostModel/SystemZ/reduce-add.ll
new file mode 100644
index 00000000000000..b62feb50212d50
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/SystemZ/reduce-add.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -mtriple=systemz-unknown -mcpu=z13 -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+
+define void @reduce(ptr %src, ptr %dst) {
+; CHECK-LABEL: 'reduce'
+; CHECK:  Cost Model: Found an estimated cost of 3 for instruction: %R2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %V2)
+; CHECK:  Cost Model: Found an estimated cost of 5 for instruction: %R4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %V4)
+; CHECK:  Cost Model: Found an estimated cost of 7 for instruction: %R8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %V8)
+; CHECK:  Cost Model: Found an estimated cost of 11 for instruction: %R16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %V16)
+;
+  %V2 = load <2 x i32>, ptr %src, align 8
+  %R2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %V2)
+  store volatile i32 %R2, ptr %dst, align 4
+
+  %V4 = load <4 x i32>, ptr %src, align 8
+  %R4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %V4)
+  store volatile i32 %R4, ptr %dst, align 4
+
+  %V8 = load <8 x i32>, ptr %src, align 8
+  %R8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %V8)
+  store volatile i32 %R8, ptr %dst, align 4
+
+  %V16 = load <16 x i32>, ptr %src, align 8
+  %R16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %V16)
+  store volatile i32 %R16, ptr %dst, align 4
+
+  ret void
+}
+
+declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
\ No newline at end of file

``````````

</details>


https://github.com/llvm/llvm-project/pull/77764


More information about the llvm-commits mailing list