[llvm] Add proper cost computation for llvm.vector.reduce.add (PR #77764)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 11 05:18:31 PST 2024
https://github.com/dominik-steenken created https://github.com/llvm/llvm-project/pull/77764
WIP
>From 03a18ee698e00c48b9bc1916fe5eec64469d0f80 Mon Sep 17 00:00:00 2001
From: Dominik Steenken <dost at de.ibm.com>
Date: Thu, 11 Jan 2024 14:14:29 +0100
Subject: [PATCH 1/2] Add test to check for "correct" reduce-add costs
---
.../Analysis/CostModel/SystemZ/reduce-add.ll | 32 +++++++++++++++++++
1 file changed, 32 insertions(+)
create mode 100644 llvm/test/Analysis/CostModel/SystemZ/reduce-add.ll
diff --git a/llvm/test/Analysis/CostModel/SystemZ/reduce-add.ll b/llvm/test/Analysis/CostModel/SystemZ/reduce-add.ll
new file mode 100644
index 00000000000000..b62feb50212d50
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/SystemZ/reduce-add.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -mtriple=systemz-unknown -mcpu=z13 -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+
+define void @reduce(ptr %src, ptr %dst) {
+; CHECK-LABEL: 'reduce'
+; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %R2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %V2)
+; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %R4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %V4)
+; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %R8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %V8)
+; CHECK: Cost Model: Found an estimated cost of 11 for instruction: %R16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %V16)
+;
+ %V2 = load <2 x i32>, ptr %src, align 8
+ %R2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %V2)
+ store volatile i32 %R2, ptr %dst, align 4
+
+ %V4 = load <4 x i32>, ptr %src, align 8
+ %R4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %V4)
+ store volatile i32 %R4, ptr %dst, align 4
+
+ %V8 = load <8 x i32>, ptr %src, align 8
+ %R8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %V8)
+ store volatile i32 %R8, ptr %dst, align 4
+
+ %V16 = load <16 x i32>, ptr %src, align 8
+ %R16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %V16)
+ store volatile i32 %R16, ptr %dst, align 4
+
+ ret void
+}
+
+declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
\ No newline at end of file
>From 65811b9c06670a6f544ac1363ebaac33a189b8ab Mon Sep 17 00:00:00 2001
From: Dominik Steenken <dost at de.ibm.com>
Date: Thu, 11 Jan 2024 14:15:37 +0100
Subject: [PATCH 2/2] Update SystemZTTI to correctly return costs for
REDUCEADD32
---
.../SystemZ/SystemZTargetTransformInfo.cpp | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 1f97e0f761c04d..d8a0923ab980b2 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -1244,9 +1244,21 @@ InstructionCost SystemZTTIImpl::getInterleavedMemoryOpCost(
return NumVectorMemOps + NumPermutes;
}
-static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {
+static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, const SmallVectorImpl<Type *>& ParamTys) {
+ dbgs() << "getVectorIntrinsicInstrCost entry, ID == " << ID << ", name = " << Intrinsic::getName(ID) << ", compare with " << Intrinsic::getName(Intrinsic::vector_reduce_add) << "(" << Intrinsic::vector_reduce_add << ")\n";
if (RetTy->isVectorTy() && ID == Intrinsic::bswap)
return getNumVectorRegs(RetTy); // VPERM
+ else if (ID == Intrinsic::vector_reduce_add) {
+ dbgs() << "getVectorIntrinsicInstrCost is reduce_add intrinsic\n";
+ auto *VTy = cast<FixedVectorType>(ParamTys.front());
+ switch (VTy->getNumElements()) {
+ default: return -1;
+ case 2: return 3;
+ case 4: return 5;
+ case 8: return 7;
+ case 16: return 11;
+ }
+ }
return -1;
}
@@ -1254,7 +1266,7 @@ InstructionCost
SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
InstructionCost Cost =
- getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType());
+ getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType(), ICA.getArgTypes());
if (Cost != -1)
return Cost;
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
More information about the llvm-commits
mailing list