[llvm-branch-commits] [llvm] AMDGPU: Enable vectorization of v2f16 copysign (PR #100799)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jul 29 01:16:37 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100799
>From ba0f8f03dc491562050a65456f7ebda23a7e4210 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 25 Jul 2024 22:36:33 +0400
Subject: [PATCH] AMDGPU: Enable vectorization of v2f16 copysign
---
.../AMDGPU/AMDGPUTargetTransformInfo.cpp | 3 +
.../Analysis/CostModel/AMDGPU/copysign.ll | 256 +++++++++---------
.../SLPVectorizer/AMDGPU/slp-v2f16.ll | 16 +-
3 files changed, 139 insertions(+), 136 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index d09f4fb2f659b..9e89898b11bcb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -688,6 +688,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) {
switch (ID) {
case Intrinsic::fma:
case Intrinsic::fmuladd:
+ case Intrinsic::copysign:
// There's a small benefit to using vector ops in the legalized code.
case Intrinsic::round:
case Intrinsic::uadd_sat:
@@ -739,6 +740,8 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
: getQuarterRateInstrCost(CostKind);
}
break;
+ case Intrinsic::copysign:
+ return NElts * getFullRateInstrCost();
case Intrinsic::uadd_sat:
case Intrinsic::usub_sat:
case Intrinsic::sadd_sat:
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
index 3b7b1b4238b8a..06a058ff2e7b1 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
@@ -12,90 +12,90 @@
define void @copysign_f16() {
; BASE-LABEL: 'copysign_f16'
; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.copysign.f16(half undef, half undef)
-; BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
-; BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
-; BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
-; BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
-; BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
-; BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
-; BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
+; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
+; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
+; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
+; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
+; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
; BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; GFX8-LABEL: 'copysign_f16'
; GFX8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.copysign.f16(half undef, half undef)
-; GFX8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
-; GFX8-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
-; GFX8-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
-; GFX8-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
-; GFX8-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
-; GFX8-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
-; GFX8-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
+; GFX8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
+; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
+; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
+; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+; GFX8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
+; GFX8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
; GFX8-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; GFX9-LABEL: 'copysign_f16'
; GFX9-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.copysign.f16(half undef, half undef)
-; GFX9-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
-; GFX9-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
-; GFX9-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
-; GFX9-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
-; GFX9-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
-; GFX9-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
-; GFX9-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
+; GFX9-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
+; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
+; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+; GFX9-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
+; GFX9-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+; GFX9-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
+; GFX9-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
; GFX9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; GFX10-LABEL: 'copysign_f16'
; GFX10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.copysign.f16(half undef, half undef)
-; GFX10-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
-; GFX10-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
-; GFX10-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
-; GFX10-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
-; GFX10-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
-; GFX10-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
-; GFX10-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
+; GFX10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
+; GFX10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
+; GFX10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+; GFX10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
+; GFX10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+; GFX10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
+; GFX10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
; GFX10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; BASE-SIZE-LABEL: 'copysign_f16'
; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.copysign.f16(half undef, half undef)
-; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
-; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
-; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
-; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
-; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
-; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
-; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
+; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
+; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
+; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
+; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
+; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; GFX8-SIZE-LABEL: 'copysign_f16'
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.copysign.f16(half undef, half undef)
-; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
-; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
-; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
-; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
-; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
-; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
-; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
+; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
+; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
+; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
+; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
+; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; GFX9-SIZE-LABEL: 'copysign_f16'
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.copysign.f16(half undef, half undef)
-; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
-; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
-; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
-; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
-; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
-; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
-; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; GFX10-SIZE-LABEL: 'copysign_f16'
; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.copysign.f16(half undef, half undef)
-; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
-; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
-; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
-; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
-; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
-; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
-; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
+; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
+; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
+; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
+; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
+; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f16 = call half @llvm.copysign.f16(half undef, half undef)
@@ -146,90 +146,90 @@ define void @copysign_f32() {
define void @copysign_bf16() {
; BASE-LABEL: 'copysign_bf16'
; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = call bfloat @llvm.copysign.bf16(bfloat undef, bfloat undef)
-; BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2bf16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
-; BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3bf16 = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef)
-; BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4bf16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
-; BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5bf16 = call <5 x bfloat> @llvm.copysign.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef)
-; BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8bf16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
-; BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v9bf16 = call <9 x bfloat> @llvm.copysign.v9bf16(<9 x bfloat> undef, <9 x bfloat> undef)
-; BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16bf16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
+; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2bf16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
+; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3bf16 = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef)
+; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
+; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5bf16 = call <5 x bfloat> @llvm.copysign.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef)
+; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8bf16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
+; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9bf16 = call <9 x bfloat> @llvm.copysign.v9bf16(<9 x bfloat> undef, <9 x bfloat> undef)
+; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16bf16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
; BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; GFX8-LABEL: 'copysign_bf16'
; GFX8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = call bfloat @llvm.copysign.bf16(bfloat undef, bfloat undef)
-; GFX8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2bf16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
-; GFX8-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3bf16 = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef)
-; GFX8-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4bf16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
-; GFX8-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5bf16 = call <5 x bfloat> @llvm.copysign.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef)
-; GFX8-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8bf16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
-; GFX8-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v9bf16 = call <9 x bfloat> @llvm.copysign.v9bf16(<9 x bfloat> undef, <9 x bfloat> undef)
-; GFX8-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16bf16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
+; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bf16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
+; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3bf16 = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef)
+; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bf16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
+; GFX8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5bf16 = call <5 x bfloat> @llvm.copysign.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef)
+; GFX8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8bf16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
+; GFX8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9bf16 = call <9 x bfloat> @llvm.copysign.v9bf16(<9 x bfloat> undef, <9 x bfloat> undef)
+; GFX8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16bf16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
; GFX8-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; GFX9-LABEL: 'copysign_bf16'
; GFX9-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = call bfloat @llvm.copysign.bf16(bfloat undef, bfloat undef)
-; GFX9-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2bf16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
-; GFX9-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3bf16 = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef)
-; GFX9-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4bf16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
-; GFX9-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5bf16 = call <5 x bfloat> @llvm.copysign.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef)
-; GFX9-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8bf16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
-; GFX9-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v9bf16 = call <9 x bfloat> @llvm.copysign.v9bf16(<9 x bfloat> undef, <9 x bfloat> undef)
-; GFX9-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16bf16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
+; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bf16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
+; GFX9-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3bf16 = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef)
+; GFX9-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bf16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
+; GFX9-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5bf16 = call <5 x bfloat> @llvm.copysign.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef)
+; GFX9-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8bf16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
+; GFX9-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9bf16 = call <9 x bfloat> @llvm.copysign.v9bf16(<9 x bfloat> undef, <9 x bfloat> undef)
+; GFX9-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16bf16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
; GFX9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; GFX10-LABEL: 'copysign_bf16'
; GFX10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = call bfloat @llvm.copysign.bf16(bfloat undef, bfloat undef)
-; GFX10-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2bf16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
-; GFX10-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3bf16 = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef)
-; GFX10-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4bf16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
-; GFX10-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5bf16 = call <5 x bfloat> @llvm.copysign.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef)
-; GFX10-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8bf16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
-; GFX10-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v9bf16 = call <9 x bfloat> @llvm.copysign.v9bf16(<9 x bfloat> undef, <9 x bfloat> undef)
-; GFX10-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16bf16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
+; GFX10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bf16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
+; GFX10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3bf16 = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef)
+; GFX10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bf16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
+; GFX10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5bf16 = call <5 x bfloat> @llvm.copysign.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef)
+; GFX10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8bf16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
+; GFX10-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9bf16 = call <9 x bfloat> @llvm.copysign.v9bf16(<9 x bfloat> undef, <9 x bfloat> undef)
+; GFX10-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16bf16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
; GFX10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; BASE-SIZE-LABEL: 'copysign_bf16'
; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = call bfloat @llvm.copysign.bf16(bfloat undef, bfloat undef)
-; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2bf16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
-; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3bf16 = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef)
-; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4bf16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
-; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5bf16 = call <5 x bfloat> @llvm.copysign.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef)
-; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8bf16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
-; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v9bf16 = call <9 x bfloat> @llvm.copysign.v9bf16(<9 x bfloat> undef, <9 x bfloat> undef)
-; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16bf16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
+; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2bf16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
+; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3bf16 = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef)
+; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
+; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5bf16 = call <5 x bfloat> @llvm.copysign.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef)
+; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8bf16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
+; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9bf16 = call <9 x bfloat> @llvm.copysign.v9bf16(<9 x bfloat> undef, <9 x bfloat> undef)
+; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16bf16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
; BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; GFX8-SIZE-LABEL: 'copysign_bf16'
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = call bfloat @llvm.copysign.bf16(bfloat undef, bfloat undef)
-; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2bf16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
-; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3bf16 = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef)
-; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4bf16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
-; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5bf16 = call <5 x bfloat> @llvm.copysign.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef)
-; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8bf16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
-; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v9bf16 = call <9 x bfloat> @llvm.copysign.v9bf16(<9 x bfloat> undef, <9 x bfloat> undef)
-; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16bf16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
+; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bf16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
+; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3bf16 = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef)
+; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bf16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
+; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5bf16 = call <5 x bfloat> @llvm.copysign.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef)
+; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8bf16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
+; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9bf16 = call <9 x bfloat> @llvm.copysign.v9bf16(<9 x bfloat> undef, <9 x bfloat> undef)
+; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16bf16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; GFX9-SIZE-LABEL: 'copysign_bf16'
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = call bfloat @llvm.copysign.bf16(bfloat undef, bfloat undef)
-; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2bf16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
-; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3bf16 = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef)
-; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4bf16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
-; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5bf16 = call <5 x bfloat> @llvm.copysign.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef)
-; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8bf16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
-; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v9bf16 = call <9 x bfloat> @llvm.copysign.v9bf16(<9 x bfloat> undef, <9 x bfloat> undef)
-; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16bf16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bf16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3bf16 = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef)
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bf16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5bf16 = call <5 x bfloat> @llvm.copysign.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef)
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8bf16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9bf16 = call <9 x bfloat> @llvm.copysign.v9bf16(<9 x bfloat> undef, <9 x bfloat> undef)
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16bf16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; GFX10-SIZE-LABEL: 'copysign_bf16'
; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = call bfloat @llvm.copysign.bf16(bfloat undef, bfloat undef)
-; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2bf16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
-; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3bf16 = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef)
-; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4bf16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
-; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5bf16 = call <5 x bfloat> @llvm.copysign.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef)
-; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8bf16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
-; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v9bf16 = call <9 x bfloat> @llvm.copysign.v9bf16(<9 x bfloat> undef, <9 x bfloat> undef)
-; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16bf16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
+; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bf16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
+; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3bf16 = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef)
+; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bf16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
+; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5bf16 = call <5 x bfloat> @llvm.copysign.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef)
+; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8bf16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
+; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9bf16 = call <9 x bfloat> @llvm.copysign.v9bf16(<9 x bfloat> undef, <9 x bfloat> undef)
+; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16bf16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
; GFX10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%bf16 = call bfloat @llvm.copysign.bf16(bfloat undef, bfloat undef)
@@ -245,25 +245,25 @@ define void @copysign_bf16() {
define void @copysign_f64() {
; ALL-LABEL: 'copysign_f64'
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.copysign.f64(double undef, double undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64 = call double @llvm.copysign.f64(double undef, double undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v8f64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef)
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; ALL-SIZE-LABEL: 'copysign_f64'
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.copysign.f64(double undef, double undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64 = call double @llvm.copysign.f64(double undef, double undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8f64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef)
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f64 = call double @llvm.copysign.f64(double undef, double undef)
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f16.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f16.ll
index 26be5556bb90d..c51bdedd5be3f 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f16.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f16.ll
@@ -273,10 +273,8 @@ bb:
ret void
}
-; FIXME: Should vectorize
; GCN-LABEL: @copysign_combine_v2f16
-; GCN: call half @llvm.copysign.f16(
-; GCN: call half @llvm.copysign.f16(
+; GCN: call <2 x half> @llvm.copysign.v2f16(
define void @copysign_combine_v2f16(ptr addrspace(1) %arg, half %sign) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -293,12 +291,14 @@ bb:
ret void
}
-; FIXME: Should vectorize
+; FIXME: Should always vectorize
; GCN-LABEL: @copysign_combine_v4f16
-; GCN: call half @llvm.copysign.f16(
-; GCN: call half @llvm.copysign.f16(
-; GCN: call half @llvm.copysign.f16(
-; GCN: call half @llvm.copysign.f16(
+; GCN: call <2 x half> @llvm.copysign.v2f16(
+
+; GFX8: call half @llvm.copysign.f16(
+; GFX8: call half @llvm.copysign.f16(
+
+; GFX9: call <2 x half> @llvm.copysign.v2f16(
define void @copysign_combine_v4f16(ptr addrspace(1) %arg, half %sign) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
More information about the llvm-branch-commits
mailing list