[llvm] 3800a83 - AMDGPU: Reduce cost of f64 copysign (#141944)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 17 16:10:55 PDT 2025
Author: Matt Arsenault
Date: 2025-06-18T08:10:53+09:00
New Revision: 3800a83160a42f32947b82700e454cc07c600734
URL: https://github.com/llvm/llvm-project/commit/3800a83160a42f32947b82700e454cc07c600734
DIFF: https://github.com/llvm/llvm-project/commit/3800a83160a42f32947b82700e454cc07c600734.diff
LOG: AMDGPU: Reduce cost of f64 copysign (#141944)
The real implementation is 1 real instruction plus a constant
materialize. Call that a 1, it's not a real f64 operation.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index b2b25ac66677e..b79c9be3eac93 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -718,9 +718,6 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy;
- if (SLT == MVT::f64)
- return LT.first * NElts * get64BitInstrCost(CostKind);
-
if ((ST->hasVOP3PInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
(ST->hasPackedFP32Ops() && SLT == MVT::f32))
NElts = (NElts + 1) / 2;
@@ -731,6 +728,11 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
switch (ICA.getID()) {
case Intrinsic::fma:
case Intrinsic::fmuladd:
+ if (SLT == MVT::f64) {
+ InstRate = get64BitInstrCost(CostKind);
+ break;
+ }
+
if ((SLT == MVT::f32 && ST->hasFastFMAF32()) || SLT == MVT::f16)
InstRate = getFullRateInstrCost();
else {
@@ -741,8 +743,8 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
case Intrinsic::copysign:
return NElts * getFullRateInstrCost();
case Intrinsic::canonicalize: {
- assert(SLT != MVT::f64);
- InstRate = getFullRateInstrCost();
+ InstRate =
+ SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost();
break;
}
case Intrinsic::uadd_sat:
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
index 334bb341a3c3e..5b042a8a04603 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
@@ -245,25 +245,25 @@ define void @copysign_bf16() {
define void @copysign_f64() {
; ALL-LABEL: 'copysign_f64'
-; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64 = call double @llvm.copysign.f64(double undef, double undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v8f64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.copysign.f64(double undef, double undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f64 = call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef)
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; ALL-SIZE-LABEL: 'copysign_f64'
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64 = call double @llvm.copysign.f64(double undef, double undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8f64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.copysign.f64(double undef, double undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f64 = call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef)
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f64 = call double @llvm.copysign.f64(double undef, double undef)
More information about the llvm-commits
mailing list