[llvm] [CostModel] getTypeBasedIntrinsicInstrCost - add default cost approximations for funnel shifts (PR #124175)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 23 10:50:36 PST 2025
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/124175
We only had handling for cases where we had argument data.
>From 44497c6c15a53d606a0a3dafd14e2f865b1d2948 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Thu, 23 Jan 2025 18:48:53 +0000
Subject: [PATCH] [CostModel] getTypeBasedIntrinsicInstrCost - add default cost
approximations for funnel shifts
We only had handling for cases where we had argument data.
---
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 29 +++++++++++++++++++
.../CostModel/AArch64/sve-intrinsics.ll | 16 +++++-----
.../Analysis/CostModel/RISCV/vp-intrinsics.ll | 4 +--
3 files changed, 39 insertions(+), 10 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 46bb84b4fec4f1..596db392392131 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2258,6 +2258,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::abs:
ISD = ISD::ABS;
break;
+ case Intrinsic::fshl:
+ ISD = ISD::FSHL;
+ break;
+ case Intrinsic::fshr:
+ ISD = ISD::FSHR;
+ break;
case Intrinsic::smax:
ISD = ISD::SMAX;
break;
@@ -2547,6 +2553,29 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
{TTI::OK_UniformConstantValue, TTI::OP_None});
return Cost;
}
+ case Intrinsic::fshl:
+ case Intrinsic::fshr: {
+ // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
+ InstructionCost Cost = 0;
+ Cost +=
+ thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
+ Cost +=
+ thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
+ Cost +=
+ thisT()->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, CostKind);
+ Cost += thisT()->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
+ CostKind);
+ Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
+ CostKind);
+ // Shift-by-zero handling.
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+ CmpInst::ICMP_EQ, CostKind);
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+ CmpInst::ICMP_EQ, CostKind);
+ return Cost;
+ }
case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat: {
if (Tys.empty())
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index 3e5de313c3cacc..696dec91d93d20 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -1025,10 +1025,10 @@ define void @fshr() #0 {
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'fshr'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
@@ -1054,10 +1054,10 @@ define void @fshl() #0 {
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'fshl'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
index 5126a6a0a3cbcd..0245a0f7ee6cbc 100644
--- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
@@ -38,7 +38,7 @@ define void @fshr(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i3
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPEBASED-LABEL: 'fshr'
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %1 = call <vscale x 1 x i32> @llvm.fshr.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <vscale x 1 x i32> @llvm.fshr.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call <vscale x 1 x i32> @llvm.fshr.nxv4i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
@@ -51,7 +51,7 @@ define void @fshl(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i3
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPEBASED-LABEL: 'fshl'
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %1 = call <vscale x 1 x i32> @llvm.fshl.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <vscale x 1 x i32> @llvm.fshl.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call <vscale x 1 x i32> @llvm.fshl.nxv4i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
More information about the llvm-commits
mailing list