[llvm] ab05ab5 - [CostModel][AMDGPU] Fix instructions costs estimation for vector types.
Daniil Fukalov via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 2 16:08:24 PST 2021
Author: Daniil Fukalov
Date: 2021-12-03T03:08:08+03:00
New Revision: ab05ab59a7bd22889fe19389431676b3414c6b7d
URL: https://github.com/llvm/llvm-project/commit/ab05ab59a7bd22889fe19389431676b3414c6b7d
DIFF: https://github.com/llvm/llvm-project/commit/ab05ab59a7bd22889fe19389431676b3414c6b7d.diff
LOG: [CostModel][AMDGPU] Fix instructions costs estimation for vector types.
1. Fixed vector instructions costs estimations incosistency - removed different
logic for "not simple types" since it biases costs for these types.
2. Fixed legalization penalty for vectors too big for the target: changed from
overwrite default legalization cost value estimation to added penalty.
3. Fixed few typos in tests.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D114893
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
llvm/test/Analysis/CostModel/AMDGPU/fabs.ll
llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
llvm/test/Analysis/CostModel/AMDGPU/mul.ll
llvm/test/Analysis/CostModel/AMDGPU/shifts.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index ecdbdf613a534..4a31a598fc789 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -519,57 +519,6 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
- EVT OrigTy = TLI->getValueType(DL, Ty);
- if (!OrigTy.isSimple()) {
- // FIXME: We're having to query the throughput cost so that the basic
- // implementation tries to generate legalize and scalarization costs. Maybe
- // we could hoist the scalarization code here?
- if (CostKind != TTI::TCK_CodeSize)
- return BaseT::getArithmeticInstrCost(Opcode, Ty, TTI::TCK_RecipThroughput,
- Opd1Info, Opd2Info, Opd1PropInfo,
- Opd2PropInfo, Args, CxtI);
- // Scalarization
-
- // Check if any of the operands are vector operands.
- int ISD = TLI->InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
-
- bool IsFloat = Ty->isFPOrFPVectorTy();
- // Assume that floating point arithmetic operations cost twice as much as
- // integer operations.
- unsigned OpCost = (IsFloat ? 2 : 1);
-
- if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
- // The operation is legal. Assume it costs 1.
- // TODO: Once we have extract/insert subvector cost we need to use them.
- return LT.first * OpCost;
- }
-
- if (!TLI->isOperationExpand(ISD, LT.second)) {
- // If the operation is custom lowered, then assume that the code is twice
- // as expensive.
- return LT.first * 2 * OpCost;
- }
-
- // Else, assume that we need to scalarize this op.
- // TODO: If one of the types get legalized by splitting, handle this
- // similarly to what getCastInstrCost() does.
- if (auto *VTy = dyn_cast<VectorType>(Ty)) {
- unsigned Num = cast<FixedVectorType>(VTy)->getNumElements();
- InstructionCost Cost = getArithmeticInstrCost(
- Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
- Opd1PropInfo, Opd2PropInfo, Args, CxtI);
- // Return the cost of multiple scalar invocation plus the cost of
- // inserting and extracting the values.
- SmallVector<Type *> Tys(Args.size(), Ty);
- return getScalarizationOverhead(VTy, Args, Tys) + Num * Cost;
- }
-
- // We don't know anything about this scalar instruction.
- return OpCost;
- }
// Legalize the type.
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 35b72f5d201ba..76d9cdb6469ca 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -12477,6 +12477,6 @@ SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,
if (Size <= 256)
return Cost;
- Cost.first = (Size + 255) / 256;
+ Cost.first += (Size + 255) / 256;
return Cost;
}
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
index 7b70eb20132cf..2bd77e252ed01 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
@@ -15,7 +15,7 @@ define amdgpu_kernel void @add_i32() #0 {
; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
-; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef
+; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; ALL-SIZE-LABEL: 'add_i32'
@@ -27,7 +27,7 @@ define amdgpu_kernel void @add_i32() #0 {
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%i32 = add i32 undef, undef
@@ -38,7 +38,7 @@ define amdgpu_kernel void @add_i32() #0 {
%v6i32 = add <6 x i32> undef, undef
%v7i32 = add <7 x i32> undef, undef
%v8i32 = add <8 x i32> undef, undef
- %v32i32 = add <32 x i32> undef, undef
+ %v9i32 = add <9 x i32> undef, undef
ret void
}
@@ -48,11 +48,7 @@ define amdgpu_kernel void @add_i64() #0 {
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
-; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef
-; ALL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef
-; ALL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef
-; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef
-; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef
+; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = add <5 x i64> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; ALL-SIZE-LABEL: 'add_i64'
@@ -60,11 +56,7 @@ define amdgpu_kernel void @add_i64() #0 {
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = add <5 x i64> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%i64 = add i64 undef, undef
@@ -72,10 +64,6 @@ define amdgpu_kernel void @add_i64() #0 {
%v3i64 = add <3 x i64> undef, undef
%v4i64 = add <4 x i64> undef, undef
%v5i64 = add <5 x i64> undef, undef
- %v6i64 = add <6 x i64> undef, undef
- %v7i64 = add <7 x i64> undef, undef
- %v8i64 = add <8 x i64> undef, undef
- %v16i64 = add <16 x i64> undef, undef
ret void
}
@@ -87,6 +75,8 @@ define amdgpu_kernel void @add_i16() #0 {
; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef
; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
+; FAST16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef
+; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef
; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SLOW16-LABEL: 'add_i16'
@@ -96,6 +86,8 @@ define amdgpu_kernel void @add_i16() #0 {
; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef
; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef
; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef
+; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = add <16 x i16> undef, undef
+; SLOW16-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17i16 = add <17 x i16> undef, undef
; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; FAST16-SIZE-LABEL: 'add_i16'
@@ -105,6 +97,8 @@ define amdgpu_kernel void @add_i16() #0 {
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
+; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef
+; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SLOW16-SIZE-LABEL: 'add_i16'
@@ -114,6 +108,8 @@ define amdgpu_kernel void @add_i16() #0 {
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef
+; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = add <16 x i16> undef, undef
+; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17i16 = add <17 x i16> undef, undef
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%i16 = add i16 undef, undef
@@ -122,6 +118,8 @@ define amdgpu_kernel void @add_i16() #0 {
%v4i16 = add <4 x i16> undef, undef
%v5i16 = add <5 x i16> undef, undef
%v6i16 = add <6 x i16> undef, undef
+ %v16i16 = add <16 x i16> undef, undef
+ %v17i16 = add <17 x i16> undef, undef
ret void
}
@@ -133,6 +131,8 @@ define amdgpu_kernel void @add_i8() #0 {
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef
+; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8 = add <32 x i8> undef, undef
+; ALL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v33i8 = add <33 x i8> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; ALL-SIZE-LABEL: 'add_i8'
@@ -142,6 +142,8 @@ define amdgpu_kernel void @add_i8() #0 {
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8 = add <32 x i8> undef, undef
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v33i8 = add <33 x i8> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%i8 = add i8 undef, undef
@@ -150,12 +152,14 @@ define amdgpu_kernel void @add_i8() #0 {
%v4i8 = add <4 x i8> undef, undef
%v5i8 = add <5 x i8> undef, undef
%v6i8 = add <6 x i8> undef, undef
+ %v32i8 = add <32 x i8> undef, undef
+ %v33i8 = add <33 x i8> undef, undef
ret void
}
define amdgpu_kernel void @sub() #0 {
; FAST16-LABEL: 'sub'
-; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
+; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -165,7 +169,7 @@ define amdgpu_kernel void @sub() #0 {
; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SLOW16-LABEL: 'sub'
-; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
+; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -175,7 +179,7 @@ define amdgpu_kernel void @sub() #0 {
; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; FAST16-SIZE-LABEL: 'sub'
-; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
+; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -185,7 +189,7 @@ define amdgpu_kernel void @sub() #0 {
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SLOW16-SIZE-LABEL: 'sub'
-; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
+; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -194,7 +198,7 @@ define amdgpu_kernel void @sub() #0 {
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = sub <4 x i16> undef, undef
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- %i8 = sub i16 undef, undef
+ %i8 = sub i8 undef, undef
%i16 = sub i16 undef, undef
%i32 = sub i32 undef, undef
%i64 = sub i64 undef, undef
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll b/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll
index 4a01b1bd677c1..50164bc708bf1 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll
@@ -10,6 +10,8 @@ define amdgpu_kernel void @fabs_f32() #0 {
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #2
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) #2
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #2
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) #2
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v9f32 = call <9 x float> @llvm.fabs.v9f32(<9 x float> undef) #2
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; ALL-SIZE-LABEL: 'fabs_f32'
@@ -18,6 +20,8 @@ define amdgpu_kernel void @fabs_f32() #0 {
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #2
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) #2
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #2
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) #2
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v9f32 = call <9 x float> @llvm.fabs.v9f32(<9 x float> undef) #2
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f32 = call float @llvm.fabs.f32(float undef) #1
@@ -25,6 +29,8 @@ define amdgpu_kernel void @fabs_f32() #0 {
%v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #1
%v4f32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) #1
%v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #1
+ %v8f32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) #1
+ %v9f32 = call <9 x float> @llvm.fabs.v9f32(<9 x float> undef) #1
ret void
}
@@ -34,6 +40,7 @@ define amdgpu_kernel void @fabs_f64() #0 {
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #2
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #2
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) #2
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f64 = call <5 x double> @llvm.fabs.v5f64(<5 x double> undef) #2
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; ALL-SIZE-LABEL: 'fabs_f64'
@@ -41,12 +48,14 @@ define amdgpu_kernel void @fabs_f64() #0 {
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #2
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #2
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) #2
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f64 = call <5 x double> @llvm.fabs.v5f64(<5 x double> undef) #2
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f64 = call double @llvm.fabs.f64(double undef) #1
%v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #1
%v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #1
%v4f64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) #1
+ %v5f64 = call <5 x double> @llvm.fabs.v5f64(<5 x double> undef) #1
ret void
}
@@ -57,6 +66,8 @@ define amdgpu_kernel void @fabs_f16() #0 {
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #2
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef) #2
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = call <5 x half> @llvm.fabs.v5f16(<5 x half> undef) #2
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f16 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef) #2
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17f16 = call <17 x half> @llvm.fabs.v17f16(<17 x half> undef) #2
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; ALL-SIZE-LABEL: 'fabs_f16'
@@ -65,6 +76,8 @@ define amdgpu_kernel void @fabs_f16() #0 {
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #2
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef) #2
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = call <5 x half> @llvm.fabs.v5f16(<5 x half> undef) #2
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f16 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef) #2
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17f16 = call <17 x half> @llvm.fabs.v17f16(<17 x half> undef) #2
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f16 = call half @llvm.fabs.f16(half undef) #1
@@ -72,6 +85,8 @@ define amdgpu_kernel void @fabs_f16() #0 {
%v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #1
%v4f16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef) #1
%v5f16 = call <5 x half> @llvm.fabs.v5f16(<5 x half> undef) #1
+ %v16f16 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef) #1
+ %v17f16 = call <17 x half> @llvm.fabs.v17f16(<17 x half> undef) #1
ret void
}
@@ -80,17 +95,22 @@ declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #1
declare <3 x float> @llvm.fabs.v3f32(<3 x float>) #1
declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #1
declare <5 x float> @llvm.fabs.v5f32(<5 x float>) #1
+declare <8 x float> @llvm.fabs.v8f32(<8 x float>) #1
+declare <9 x float> @llvm.fabs.v9f32(<9 x float>) #1
declare double @llvm.fabs.f64(double) #1
declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #1
declare <3 x double> @llvm.fabs.v3f64(<3 x double>) #1
declare <4 x double> @llvm.fabs.v4f64(<4 x double>) #1
+declare <5 x double> @llvm.fabs.v5f64(<5 x double>) #1
declare half @llvm.fabs.f16(half) #1
declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
declare <3 x half> @llvm.fabs.v3f16(<3 x half>) #1
declare <4 x half> @llvm.fabs.v4f16(<4 x half>) #1
declare <5 x half> @llvm.fabs.v5f16(<5 x half>) #1
+declare <16 x half> @llvm.fabs.v16f16(<16 x half>) #1
+declare <17 x half> @llvm.fabs.v17f16(<17 x half>) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
index 2decec00badd3..12760920733d1 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
@@ -14,6 +14,8 @@ define amdgpu_kernel void @fadd_f32() #0 {
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fadd <3 x float> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fadd <4 x float> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef
+; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fadd <8 x float> undef, undef
+; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fadd <9 x float> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; NOPACKEDF32-LABEL: 'fadd_f32'
@@ -22,6 +24,8 @@ define amdgpu_kernel void @fadd_f32() #0 {
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fadd <3 x float> undef, undef
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fadd <4 x float> undef, undef
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef
+; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fadd <8 x float> undef, undef
+; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fadd <9 x float> undef, undef
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; GFX90A-FASTF64-SIZE-LABEL: 'fadd_f32'
@@ -30,6 +34,8 @@ define amdgpu_kernel void @fadd_f32() #0 {
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fadd <3 x float> undef, undef
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fadd <4 x float> undef, undef
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fadd <8 x float> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fadd <9 x float> undef, undef
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; NOPACKEDF32-SIZE-LABEL: 'fadd_f32'
@@ -38,6 +44,8 @@ define amdgpu_kernel void @fadd_f32() #0 {
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fadd <3 x float> undef, undef
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fadd <4 x float> undef, undef
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef
+; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fadd <8 x float> undef, undef
+; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fadd <9 x float> undef, undef
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f32 = fadd float undef, undef
@@ -45,6 +53,8 @@ define amdgpu_kernel void @fadd_f32() #0 {
%v3f32 = fadd <3 x float> undef, undef
%v4f32 = fadd <4 x float> undef, undef
%v5f32 = fadd <5 x float> undef, undef
+ %v8f32 = fadd <8 x float> undef, undef
+ %v9f32 = fadd <9 x float> undef, undef
ret void
}
@@ -54,7 +64,7 @@ define amdgpu_kernel void @fadd_f64() #0 {
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fadd <2 x double> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fadd <3 x double> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fadd <4 x double> undef, undef
-; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fadd <5 x double> undef, undef
+; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fadd <5 x double> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; FASTF64-LABEL: 'fadd_f64'
@@ -62,7 +72,7 @@ define amdgpu_kernel void @fadd_f64() #0 {
; FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fadd <2 x double> undef, undef
; FASTF64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fadd <3 x double> undef, undef
; FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fadd <4 x double> undef, undef
-; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fadd <5 x double> undef, undef
+; FASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fadd <5 x double> undef, undef
; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SLOWF64-LABEL: 'fadd_f64'
@@ -70,7 +80,7 @@ define amdgpu_kernel void @fadd_f64() #0 {
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fadd <2 x double> undef, undef
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fadd <3 x double> undef, undef
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fadd <4 x double> undef, undef
-; SLOWF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = fadd <5 x double> undef, undef
+; SLOWF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = fadd <5 x double> undef, undef
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; GFX90A-FASTF64-SIZE-LABEL: 'fadd_f64'
@@ -78,7 +88,7 @@ define amdgpu_kernel void @fadd_f64() #0 {
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fadd <2 x double> undef, undef
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fadd <3 x double> undef, undef
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fadd <4 x double> undef, undef
-; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fadd <5 x double> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fadd <5 x double> undef, undef
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; NOPACKEDF32-SIZE-LABEL: 'fadd_f64'
@@ -86,7 +96,7 @@ define amdgpu_kernel void @fadd_f64() #0 {
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fadd <2 x double> undef, undef
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fadd <3 x double> undef, undef
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fadd <4 x double> undef, undef
-; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fadd <5 x double> undef, undef
+; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fadd <5 x double> undef, undef
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f64 = fadd double undef, undef
@@ -103,7 +113,9 @@ define amdgpu_kernel void @fadd_f16() #0 {
; FASTF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fadd <2 x half> undef, undef
; FASTF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fadd <3 x half> undef, undef
; FASTF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fadd <4 x half> undef, undef
-; FASTF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fadd <5 x half> undef, undef
+; FASTF16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fadd <5 x half> undef, undef
+; FASTF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fadd <16 x half> undef, undef
+; FASTF16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fadd <17 x half> undef, undef
; FASTF16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SLOWF64-LABEL: 'fadd_f16'
@@ -111,7 +123,9 @@ define amdgpu_kernel void @fadd_f16() #0 {
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fadd <2 x half> undef, undef
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fadd <3 x half> undef, undef
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fadd <4 x half> undef, undef
-; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fadd <5 x half> undef, undef
+; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fadd <5 x half> undef, undef
+; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fadd <16 x half> undef, undef
+; SLOWF64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fadd <17 x half> undef, undef
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; FASTF16-SIZE-LABEL: 'fadd_f16'
@@ -119,7 +133,9 @@ define amdgpu_kernel void @fadd_f16() #0 {
; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fadd <2 x half> undef, undef
; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fadd <3 x half> undef, undef
; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fadd <4 x half> undef, undef
-; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fadd <5 x half> undef, undef
+; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fadd <5 x half> undef, undef
+; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fadd <16 x half> undef, undef
+; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fadd <17 x half> undef, undef
; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SLOWF64-SIZE-LABEL: 'fadd_f16'
@@ -127,7 +143,9 @@ define amdgpu_kernel void @fadd_f16() #0 {
; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fadd <2 x half> undef, undef
; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fadd <3 x half> undef, undef
; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fadd <4 x half> undef, undef
-; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fadd <5 x half> undef, undef
+; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fadd <5 x half> undef, undef
+; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fadd <16 x half> undef, undef
+; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fadd <17 x half> undef, undef
; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f16 = fadd half undef, undef
@@ -135,6 +153,8 @@ define amdgpu_kernel void @fadd_f16() #0 {
%v3f16 = fadd <3 x half> undef, undef
%v4f16 = fadd <4 x half> undef, undef
%v5f16 = fadd <5 x half> undef, undef
+ %v16f16 = fadd <16 x half> undef, undef
+ %v17f16 = fadd <17 x half> undef, undef
ret void
}
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
index ff300dd209cd9..310598da87d7e 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
@@ -19,6 +19,8 @@ define amdgpu_kernel void @fdiv_f32_ieee() #0 {
; ALL-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef
+; ALL-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v8f32 = fdiv <8 x float> undef, undef
+; ALL-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %v9f32 = fdiv <9 x float> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; ALL-SIZE-LABEL: 'fdiv_f32_ieee'
@@ -27,6 +29,8 @@ define amdgpu_kernel void @fdiv_f32_ieee() #0 {
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> undef, undef
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v8f32 = fdiv <8 x float> undef, undef
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v9f32 = fdiv <9 x float> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f32 = fdiv float undef, undef
@@ -34,6 +38,8 @@ define amdgpu_kernel void @fdiv_f32_ieee() #0 {
%v3f32 = fdiv <3 x float> undef, undef
%v4f32 = fdiv <4 x float> undef, undef
%v5f32 = fdiv <5 x float> undef, undef
+ %v8f32 = fdiv <8 x float> undef, undef
+ %v9f32 = fdiv <9 x float> undef, undef
ret void
}
@@ -44,6 +50,8 @@ define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 {
; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f32 = fdiv <3 x float> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f32 = fdiv <4 x float> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v5f32 = fdiv <5 x float> undef, undef
+; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8f32 = fdiv <8 x float> undef, undef
+; ALL-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %v9f32 = fdiv <9 x float> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; ALL-SIZE-LABEL: 'fdiv_f32_ftzdaz'
@@ -52,6 +60,8 @@ define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 {
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v8f32 = fdiv <8 x float> undef, undef
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %v9f32 = fdiv <9 x float> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f32 = fdiv float undef, undef
@@ -59,6 +69,8 @@ define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 {
%v3f32 = fdiv <3 x float> undef, undef
%v4f32 = fdiv <4 x float> undef, undef
%v5f32 = fdiv <5 x float> undef, undef
+ %v8f32 = fdiv <8 x float> undef, undef
+ %v9f32 = fdiv <9 x float> undef, undef
ret void
}
@@ -68,7 +80,7 @@ define amdgpu_kernel void @fdiv_f64() #0 {
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> undef, undef
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> undef, undef
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> undef, undef
-; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v5f64 = fdiv <5 x double> undef, undef
+; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> undef, undef
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; CISLOWF64-LABEL: 'fdiv_f64'
@@ -76,7 +88,7 @@ define amdgpu_kernel void @fdiv_f64() #0 {
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> undef, undef
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> undef, undef
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> undef, undef
-; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> undef, undef
+; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> undef, undef
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SIFASTF64-LABEL: 'fdiv_f64'
@@ -84,7 +96,7 @@ define amdgpu_kernel void @fdiv_f64() #0 {
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> undef, undef
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> undef, undef
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> undef, undef
-; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v5f64 = fdiv <5 x double> undef, undef
+; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> undef, undef
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SISLOWF64-LABEL: 'fdiv_f64'
@@ -92,7 +104,7 @@ define amdgpu_kernel void @fdiv_f64() #0 {
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> undef, undef
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> undef, undef
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> undef, undef
-; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %v5f64 = fdiv <5 x double> undef, undef
+; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> undef, undef
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; FP16-LABEL: 'fdiv_f64'
@@ -100,7 +112,7 @@ define amdgpu_kernel void @fdiv_f64() #0 {
; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> undef, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> undef, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> undef, undef
-; FP16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> undef, undef
+; FP16-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> undef, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; CI-SIZE-LABEL: 'fdiv_f64'
@@ -108,7 +120,7 @@ define amdgpu_kernel void @fdiv_f64() #0 {
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> undef, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> undef, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> undef, undef
-; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> undef, undef
+; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> undef, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SI-SIZE-LABEL: 'fdiv_f64'
@@ -116,7 +128,7 @@ define amdgpu_kernel void @fdiv_f64() #0 {
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> undef, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> undef, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> undef, undef
-; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %v5f64 = fdiv <5 x double> undef, undef
+; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> undef, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; FP16-SIZE-LABEL: 'fdiv_f64'
@@ -124,7 +136,7 @@ define amdgpu_kernel void @fdiv_f64() #0 {
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> undef, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> undef, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> undef, undef
-; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> undef, undef
+; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> undef, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f64 = fdiv double undef, undef
@@ -141,7 +153,9 @@ define amdgpu_kernel void @fdiv_f16_f32ieee() #0 {
; NOFP16-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> undef, undef
; NOFP16-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> undef, undef
; NOFP16-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> undef, undef
-; NOFP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; NOFP16-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; NOFP16-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %v16f16 = fdiv <16 x half> undef, undef
+; NOFP16-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %v17f16 = fdiv <17 x half> undef, undef
; NOFP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; FP16-LABEL: 'fdiv_f16_f32ieee'
@@ -149,7 +163,9 @@ define amdgpu_kernel void @fdiv_f16_f32ieee() #0 {
; FP16-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> undef, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef
-; FP16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; FP16-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; FP16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v16f16 = fdiv <16 x half> undef, undef
+; FP16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %v17f16 = fdiv <17 x half> undef, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; NOFP16-SIZE-LABEL: 'fdiv_f16_f32ieee'
@@ -157,7 +173,9 @@ define amdgpu_kernel void @fdiv_f16_f32ieee() #0 {
; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef
; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> undef, undef
; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef
-; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v16f16 = fdiv <16 x half> undef, undef
+; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %v17f16 = fdiv <17 x half> undef, undef
; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; FP16-SIZE-LABEL: 'fdiv_f16_f32ieee'
@@ -165,7 +183,9 @@ define amdgpu_kernel void @fdiv_f16_f32ieee() #0 {
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16 = fdiv <2 x half> undef, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v3f16 = fdiv <3 x half> undef, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4f16 = fdiv <4 x half> undef, undef
-; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16f16 = fdiv <16 x half> undef, undef
+; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v17f16 = fdiv <17 x half> undef, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f16 = fdiv half undef, undef
@@ -173,6 +193,8 @@ define amdgpu_kernel void @fdiv_f16_f32ieee() #0 {
%v3f16 = fdiv <3 x half> undef, undef
%v4f16 = fdiv <4 x half> undef, undef
%v5f16 = fdiv <5 x half> undef, undef
+ %v16f16 = fdiv <16 x half> undef, undef
+ %v17f16 = fdiv <17 x half> undef, undef
ret void
}
@@ -182,7 +204,9 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
; NOFP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2f16 = fdiv <2 x half> undef, undef
; NOFP16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v3f16 = fdiv <3 x half> undef, undef
; NOFP16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f16 = fdiv <4 x half> undef, undef
-; NOFP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; NOFP16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; NOFP16-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16f16 = fdiv <16 x half> undef, undef
+; NOFP16-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %v17f16 = fdiv <17 x half> undef, undef
; NOFP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; FP16-LABEL: 'fdiv_f16_f32ftzdaz'
@@ -190,7 +214,9 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
; FP16-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> undef, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef
-; FP16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; FP16-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; FP16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v16f16 = fdiv <16 x half> undef, undef
+; FP16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %v17f16 = fdiv <17 x half> undef, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; NOFP16-SIZE-LABEL: 'fdiv_f16_f32ftzdaz'
@@ -198,7 +224,9 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> undef, undef
; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> undef, undef
; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> undef, undef
-; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %v16f16 = fdiv <16 x half> undef, undef
+; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %v17f16 = fdiv <17 x half> undef, undef
; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; FP16-SIZE-LABEL: 'fdiv_f16_f32ftzdaz'
@@ -206,7 +234,9 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16 = fdiv <2 x half> undef, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v3f16 = fdiv <3 x half> undef, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4f16 = fdiv <4 x half> undef, undef
-; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16f16 = fdiv <16 x half> undef, undef
+; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v17f16 = fdiv <17 x half> undef, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f16 = fdiv half undef, undef
@@ -214,6 +244,8 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
%v3f16 = fdiv <3 x half> undef, undef
%v4f16 = fdiv <4 x half> undef, undef
%v5f16 = fdiv <5 x half> undef, undef
+ %v16f16 = fdiv <16 x half> undef, undef
+ %v17f16 = fdiv <17 x half> undef, undef
ret void
}
@@ -223,7 +255,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -233,7 +265,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; CISLOWF64-LABEL: 'rcp_ieee'
@@ -241,7 +273,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -251,7 +283,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SIFASTF64-LABEL: 'rcp_ieee'
@@ -259,7 +291,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -269,7 +301,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SISLOWF64-LABEL: 'rcp_ieee'
@@ -277,7 +309,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -287,7 +319,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; FP16-LABEL: 'rcp_ieee'
@@ -295,7 +327,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -305,7 +337,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; CI-SIZE-LABEL: 'rcp_ieee'
@@ -313,7 +345,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -323,7 +355,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SI-SIZE-LABEL: 'rcp_ieee'
@@ -331,7 +363,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -341,7 +373,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; FP16-SIZE-LABEL: 'rcp_ieee'
@@ -349,7 +381,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -359,7 +391,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f16 = fdiv half 1.0, undef
@@ -396,7 +428,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; CISLOWF64-LABEL: 'rcp_ftzdaz'
@@ -414,7 +446,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SIFASTF64-LABEL: 'rcp_ftzdaz'
@@ -432,7 +464,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SISLOWF64-LABEL: 'rcp_ftzdaz'
@@ -450,7 +482,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; FP16-LABEL: 'rcp_ftzdaz'
@@ -458,7 +490,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -468,7 +500,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; CI-SIZE-LABEL: 'rcp_ftzdaz'
@@ -476,7 +508,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -486,7 +518,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SI-SIZE-LABEL: 'rcp_ftzdaz'
@@ -494,7 +526,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -504,7 +536,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; FP16-SIZE-LABEL: 'rcp_ftzdaz'
@@ -512,7 +544,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -522,7 +554,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f16 = fdiv half 1.0, undef
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
index 5eb4aa78b8c9e..a5906b070d58e 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
@@ -14,6 +14,8 @@ define amdgpu_kernel void @fmul_f32() #0 {
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef
+; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fmul <8 x float> undef, undef
+; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fmul <9 x float> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; F32-LABEL: 'fmul_f32'
@@ -22,6 +24,8 @@ define amdgpu_kernel void @fmul_f32() #0 {
; F32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef
; F32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef
; F32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef
+; F32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fmul <8 x float> undef, undef
+; F32-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fmul <9 x float> undef, undef
; F32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; GFX90A-SIZE-LABEL: 'fmul_f32'
@@ -30,6 +34,8 @@ define amdgpu_kernel void @fmul_f32() #0 {
; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef
; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef
; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef
+; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fmul <8 x float> undef, undef
+; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fmul <9 x float> undef, undef
; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'fmul_f32'
@@ -38,6 +44,8 @@ define amdgpu_kernel void @fmul_f32() #0 {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef
; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef
+; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fmul <8 x float> undef, undef
+; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fmul <9 x float> undef, undef
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f32 = fmul float undef, undef
@@ -45,6 +53,8 @@ define amdgpu_kernel void @fmul_f32() #0 {
%v3f32 = fmul <3 x float> undef, undef
%v4f32 = fmul <4 x float> undef, undef
%v5f32 = fmul <5 x float> undef, undef
+ %v8f32 = fmul <8 x float> undef, undef
+ %v9f32 = fmul <9 x float> undef, undef
ret void
}
@@ -54,7 +64,7 @@ define amdgpu_kernel void @fmul_f64() #0 {
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fmul <2 x double> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fmul <3 x double> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fmul <4 x double> undef, undef
-; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fmul <5 x double> undef, undef
+; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fmul <5 x double> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; FASTF64-LABEL: 'fmul_f64'
@@ -62,7 +72,7 @@ define amdgpu_kernel void @fmul_f64() #0 {
; FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fmul <2 x double> undef, undef
; FASTF64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fmul <3 x double> undef, undef
; FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fmul <4 x double> undef, undef
-; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fmul <5 x double> undef, undef
+; FASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fmul <5 x double> undef, undef
; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SLOW-LABEL: 'fmul_f64'
@@ -70,7 +80,7 @@ define amdgpu_kernel void @fmul_f64() #0 {
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fmul <2 x double> undef, undef
; SLOW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fmul <3 x double> undef, undef
; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fmul <4 x double> undef, undef
-; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = fmul <5 x double> undef, undef
+; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = fmul <5 x double> undef, undef
; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; GFX90A-SIZE-LABEL: 'fmul_f64'
@@ -78,7 +88,7 @@ define amdgpu_kernel void @fmul_f64() #0 {
; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fmul <2 x double> undef, undef
; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fmul <3 x double> undef, undef
; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fmul <4 x double> undef, undef
-; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fmul <5 x double> undef, undef
+; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fmul <5 x double> undef, undef
; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'fmul_f64'
@@ -86,7 +96,7 @@ define amdgpu_kernel void @fmul_f64() #0 {
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fmul <2 x double> undef, undef
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fmul <3 x double> undef, undef
; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fmul <4 x double> undef, undef
-; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fmul <5 x double> undef, undef
+; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fmul <5 x double> undef, undef
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f64 = fmul double undef, undef
@@ -103,7 +113,9 @@ define amdgpu_kernel void @fmul_f16() #0 {
; GFX9-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fmul <2 x half> undef, undef
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fmul <3 x half> undef, undef
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef
-; GFX9-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef
+; GFX9-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fmul <5 x half> undef, undef
+; GFX9-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fmul <16 x half> undef, undef
+; GFX9-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fmul <17 x half> undef, undef
; GFX9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SLOW-LABEL: 'fmul_f16'
@@ -111,7 +123,9 @@ define amdgpu_kernel void @fmul_f16() #0 {
; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fmul <2 x half> undef, undef
; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fmul <3 x half> undef, undef
; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fmul <4 x half> undef, undef
-; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fmul <5 x half> undef, undef
+; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef
+; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fmul <16 x half> undef, undef
+; SLOW-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fmul <17 x half> undef, undef
; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; GFX9-SIZE-LABEL: 'fmul_f16'
@@ -119,7 +133,9 @@ define amdgpu_kernel void @fmul_f16() #0 {
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fmul <2 x half> undef, undef
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fmul <3 x half> undef, undef
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef
-; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fmul <5 x half> undef, undef
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fmul <16 x half> undef, undef
+; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fmul <17 x half> undef, undef
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SLOW-SIZE-LABEL: 'fmul_f16'
@@ -127,7 +143,9 @@ define amdgpu_kernel void @fmul_f16() #0 {
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fmul <2 x half> undef, undef
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fmul <3 x half> undef, undef
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fmul <4 x half> undef, undef
-; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fmul <5 x half> undef, undef
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fmul <16 x half> undef, undef
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fmul <17 x half> undef, undef
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f16 = fmul half undef, undef
@@ -135,6 +153,8 @@ define amdgpu_kernel void @fmul_f16() #0 {
%v3f16 = fmul <3 x half> undef, undef
%v4f16 = fmul <4 x half> undef, undef
%v5f16 = fmul <5 x half> undef, undef
+ %v16f16 = fmul <16 x half> undef, undef
+ %v17f16 = fmul <17 x half> undef, undef
ret void
}
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
index b4482906cf400..bf33cdc63553b 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
@@ -10,6 +10,8 @@ define amdgpu_kernel void @fneg_f32() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = fneg <3 x float> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = fneg <4 x float> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = fneg <5 x float> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = fneg <8 x float> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v9f32 = fneg <9 x float> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SIZE-LABEL: 'fneg_f32'
@@ -18,6 +20,8 @@ define amdgpu_kernel void @fneg_f32() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = fneg <3 x float> undef
; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = fneg <4 x float> undef
; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = fneg <5 x float> undef
+; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = fneg <8 x float> undef
+; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v9f32 = fneg <9 x float> undef
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f32 = fneg float undef
@@ -25,6 +29,8 @@ define amdgpu_kernel void @fneg_f32() {
%v3f32 = fneg <3 x float> undef
%v4f32 = fneg <4 x float> undef
%v5f32 = fneg <5 x float> undef
+ %v8f32 = fneg <8 x float> undef
+ %v9f32 = fneg <9 x float> undef
ret void
}
@@ -59,7 +65,9 @@ define amdgpu_kernel void @fneg_f16() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = fneg <2 x half> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = fneg <3 x half> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = fneg <4 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fneg <5 x half> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = fneg <5 x half> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f16 = fneg <16 x half> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17f16 = fneg <17 x half> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SIZE-LABEL: 'fneg_f16'
@@ -67,7 +75,9 @@ define amdgpu_kernel void @fneg_f16() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = fneg <2 x half> undef
; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = fneg <3 x half> undef
; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = fneg <4 x half> undef
-; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fneg <5 x half> undef
+; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = fneg <5 x half> undef
+; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f16 = fneg <16 x half> undef
+; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17f16 = fneg <17 x half> undef
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f16 = fneg half undef
@@ -75,5 +85,7 @@ define amdgpu_kernel void @fneg_f16() {
%v3f16 = fneg <3 x half> undef
%v4f16 = fneg <4 x half> undef
%v5f16 = fneg <5 x half> undef
+ %v16f16 = fneg <16 x half> undef
+ %v17f16 = fneg <17 x half> undef
ret void
}
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
index a10ef8e16b022..38fec3cfe5349 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
@@ -14,6 +14,8 @@ define amdgpu_kernel void @fsub_f32() #0 {
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef
+; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fsub <8 x float> undef, undef
+; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fsub <9 x float> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; NOPACKEDF32-LABEL: 'fsub_f32'
@@ -22,6 +24,8 @@ define amdgpu_kernel void @fsub_f32() #0 {
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef
+; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fsub <8 x float> undef, undef
+; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fsub <9 x float> undef, undef
; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; GFX90A-FASTF64-SIZE-LABEL: 'fsub_f32'
@@ -30,6 +34,8 @@ define amdgpu_kernel void @fsub_f32() #0 {
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fsub <8 x float> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fsub <9 x float> undef, undef
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; NOPACKEDF32-SIZE-LABEL: 'fsub_f32'
@@ -38,6 +44,8 @@ define amdgpu_kernel void @fsub_f32() #0 {
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef
+; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fsub <8 x float> undef, undef
+; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fsub <9 x float> undef, undef
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f32 = fsub float undef, undef
@@ -45,6 +53,8 @@ define amdgpu_kernel void @fsub_f32() #0 {
%v3f32 = fsub <3 x float> undef, undef
%v4f32 = fsub <4 x float> undef, undef
%v5f32 = fsub <5 x float> undef, undef
+ %v8f32 = fsub <8 x float> undef, undef
+ %v9f32 = fsub <9 x float> undef, undef
ret void
}
@@ -54,7 +64,7 @@ define amdgpu_kernel void @fsub_f64() #0 {
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fsub <2 x double> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fsub <3 x double> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fsub <4 x double> undef, undef
-; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fsub <5 x double> undef, undef
+; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fsub <5 x double> undef, undef
; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; FASTF64-LABEL: 'fsub_f64'
@@ -62,7 +72,7 @@ define amdgpu_kernel void @fsub_f64() #0 {
; FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fsub <2 x double> undef, undef
; FASTF64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fsub <3 x double> undef, undef
; FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fsub <4 x double> undef, undef
-; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fsub <5 x double> undef, undef
+; FASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fsub <5 x double> undef, undef
; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SLOWF64-LABEL: 'fsub_f64'
@@ -70,7 +80,7 @@ define amdgpu_kernel void @fsub_f64() #0 {
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fsub <2 x double> undef, undef
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fsub <3 x double> undef, undef
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fsub <4 x double> undef, undef
-; SLOWF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = fsub <5 x double> undef, undef
+; SLOWF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = fsub <5 x double> undef, undef
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; GFX90A-FASTF64-SIZE-LABEL: 'fsub_f64'
@@ -78,7 +88,7 @@ define amdgpu_kernel void @fsub_f64() #0 {
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fsub <2 x double> undef, undef
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fsub <3 x double> undef, undef
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fsub <4 x double> undef, undef
-; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fsub <5 x double> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fsub <5 x double> undef, undef
; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; NOPACKEDF32-SIZE-LABEL: 'fsub_f64'
@@ -86,7 +96,7 @@ define amdgpu_kernel void @fsub_f64() #0 {
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fsub <2 x double> undef, undef
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fsub <3 x double> undef, undef
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fsub <4 x double> undef, undef
-; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fsub <5 x double> undef, undef
+; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fsub <5 x double> undef, undef
; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f64 = fsub double undef, undef
@@ -103,7 +113,9 @@ define amdgpu_kernel void @fsub_f16() #0 {
; FASTF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fsub <2 x half> undef, undef
; FASTF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fsub <3 x half> undef, undef
; FASTF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef
-; FASTF16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = fsub <5 x half> undef, undef
+; FASTF16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fsub <5 x half> undef, undef
+; FASTF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fsub <16 x half> undef, undef
+; FASTF16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fsub <17 x half> undef, undef
; FASTF16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SLOWF64-LABEL: 'fsub_f16'
@@ -111,7 +123,9 @@ define amdgpu_kernel void @fsub_f16() #0 {
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fsub <2 x half> undef, undef
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fsub <3 x half> undef, undef
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fsub <4 x half> undef, undef
-; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fsub <5 x half> undef, undef
+; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fsub <5 x half> undef, undef
+; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fsub <16 x half> undef, undef
+; SLOWF64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fsub <17 x half> undef, undef
; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; FASTF16-SIZE-LABEL: 'fsub_f16'
@@ -119,7 +133,9 @@ define amdgpu_kernel void @fsub_f16() #0 {
; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fsub <2 x half> undef, undef
; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fsub <3 x half> undef, undef
; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef
-; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = fsub <5 x half> undef, undef
+; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fsub <5 x half> undef, undef
+; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fsub <16 x half> undef, undef
+; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fsub <17 x half> undef, undef
; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SLOWF64-SIZE-LABEL: 'fsub_f16'
@@ -127,7 +143,9 @@ define amdgpu_kernel void @fsub_f16() #0 {
; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fsub <2 x half> undef, undef
; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fsub <3 x half> undef, undef
; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fsub <4 x half> undef, undef
-; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fsub <5 x half> undef, undef
+; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fsub <5 x half> undef, undef
+; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fsub <16 x half> undef, undef
+; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fsub <17 x half> undef, undef
; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%f16 = fsub half undef, undef
@@ -135,5 +153,7 @@ define amdgpu_kernel void @fsub_f16() #0 {
%v3f16 = fsub <3 x half> undef, undef
%v4f16 = fsub <4 x half> undef, undef
%v5f16 = fsub <5 x half> undef, undef
+ %v16f16 = fsub <16 x half> undef, undef
+ %v17f16 = fsub <17 x half> undef, undef
ret void
}
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
index 3c5e3da71079e..824b7e1ee93d6 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
@@ -12,6 +12,8 @@ define amdgpu_kernel void @mul_i32() #0 {
; ALL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3i32 = mul <3 x i32> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i32 = mul <4 x i32> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5i32 = mul <5 x i32> undef, undef
+; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i32 = mul <8 x i32> undef, undef
+; ALL-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v9i32 = mul <9 x i32> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; ALL-SIZE-LABEL: 'mul_i32'
@@ -20,6 +22,8 @@ define amdgpu_kernel void @mul_i32() #0 {
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i32 = mul <3 x i32> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i32 = mul <4 x i32> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i32 = mul <5 x i32> undef, undef
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i32 = mul <8 x i32> undef, undef
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v9i32 = mul <9 x i32> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%i32 = mul i32 undef, undef
@@ -27,6 +31,8 @@ define amdgpu_kernel void @mul_i32() #0 {
%v3i32 = mul <3 x i32> undef, undef
%v4i32 = mul <4 x i32> undef, undef
%v5i32 = mul <5 x i32> undef, undef
+ %v8i32 = mul <8 x i32> undef, undef
+ %v9i32 = mul <9 x i32> undef, undef
ret void
}
@@ -36,8 +42,7 @@ define amdgpu_kernel void @mul_i64() #0 {
; ALL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2i64 = mul <2 x i64> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v3i64 = mul <3 x i64> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v4i64 = mul <4 x i64> undef, undef
-; ALL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v5i64 = mul <4 x i64> undef, undef
-; ALL-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v8i64 = mul <8 x i64> undef, undef
+; ALL-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %v5i64 = mul <5 x i64> undef, undef
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; ALL-SIZE-LABEL: 'mul_i64'
@@ -45,16 +50,14 @@ define amdgpu_kernel void @mul_i64() #0 {
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2i64 = mul <2 x i64> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3i64 = mul <3 x i64> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4i64 = mul <4 x i64> undef, undef
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = mul <4 x i64> undef, undef
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v8i64 = mul <8 x i64> undef, undef
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %v5i64 = mul <5 x i64> undef, undef
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%i64 = mul i64 undef, undef
%v2i64 = mul <2 x i64> undef, undef
%v3i64 = mul <3 x i64> undef, undef
%v4i64 = mul <4 x i64> undef, undef
- %v5i64 = mul <4 x i64> undef, undef
- %v8i64 = mul <8 x i64> undef, undef
+ %v5i64 = mul <5 x i64> undef, undef
ret void
}
@@ -64,7 +67,9 @@ define amdgpu_kernel void @mul_i16() #0 {
; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16 = mul <2 x i16> undef, undef
; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3i16 = mul <3 x i16> undef, undef
; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i16 = mul <4 x i16> undef, undef
-; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = mul <5 x i16> undef, undef
+; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5i16 = mul <5 x i16> undef, undef
+; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16i16 = mul <16 x i16> undef, undef
+; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v17i16 = mul <17 x i16> undef, undef
; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; FAST16-LABEL: 'mul_i16'
@@ -72,7 +77,9 @@ define amdgpu_kernel void @mul_i16() #0 {
; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i16 = mul <2 x i16> undef, undef
; FAST16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3i16 = mul <3 x i16> undef, undef
; FAST16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i16 = mul <4 x i16> undef, undef
-; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = mul <5 x i16> undef, undef
+; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5i16 = mul <5 x i16> undef, undef
+; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i16 = mul <16 x i16> undef, undef
+; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v17i16 = mul <17 x i16> undef, undef
; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SLOW16-SIZE-LABEL: 'mul_i16'
@@ -80,7 +87,9 @@ define amdgpu_kernel void @mul_i16() #0 {
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i16 = mul <2 x i16> undef, undef
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3i16 = mul <3 x i16> undef, undef
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i16 = mul <4 x i16> undef, undef
-; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = mul <5 x i16> undef, undef
+; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5i16 = mul <5 x i16> undef, undef
+; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i16 = mul <16 x i16> undef, undef
+; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v17i16 = mul <17 x i16> undef, undef
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; FAST16-SIZE-LABEL: 'mul_i16'
@@ -88,7 +97,9 @@ define amdgpu_kernel void @mul_i16() #0 {
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = mul <2 x i16> undef, undef
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = mul <3 x i16> undef, undef
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = mul <4 x i16> undef, undef
-; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = mul <5 x i16> undef, undef
+; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = mul <5 x i16> undef, undef
+; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = mul <16 x i16> undef, undef
+; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v17i16 = mul <17 x i16> undef, undef
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%i16 = mul i16 undef, undef
@@ -96,6 +107,8 @@ define amdgpu_kernel void @mul_i16() #0 {
%v3i16 = mul <3 x i16> undef, undef
%v4i16 = mul <4 x i16> undef, undef
%v5i16 = mul <5 x i16> undef, undef
+ %v16i16 = mul <16 x i16> undef, undef
+ %v17i16 = mul <17 x i16> undef, undef
ret void
}
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll b/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll
index 3ab46ad8e14b3..c2d3ad4692207 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll
@@ -26,7 +26,7 @@ define amdgpu_kernel void @shl() #0 {
; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = shl <2 x i64> undef, undef
; FAST64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = shl <3 x i64> undef, undef
; FAST64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = shl <4 x i64> undef, undef
-; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = shl <5 x i64> undef, undef
+; FAST64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = shl <5 x i64> undef, undef
; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SLOW64-LABEL: 'shl'
@@ -49,7 +49,7 @@ define amdgpu_kernel void @shl() #0 {
; SLOW64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = shl <2 x i64> undef, undef
; SLOW64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3i64 = shl <3 x i64> undef, undef
; SLOW64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = shl <4 x i64> undef, undef
-; SLOW64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5i64 = shl <5 x i64> undef, undef
+; SLOW64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5i64 = shl <5 x i64> undef, undef
; SLOW64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; FAST64-SIZE-LABEL: 'shl'
@@ -72,7 +72,7 @@ define amdgpu_kernel void @shl() #0 {
; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = shl <2 x i64> undef, undef
; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = shl <3 x i64> undef, undef
; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = shl <4 x i64> undef, undef
-; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = shl <5 x i64> undef, undef
+; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = shl <5 x i64> undef, undef
; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SLOW64-SIZE-LABEL: 'shl'
@@ -95,7 +95,7 @@ define amdgpu_kernel void @shl() #0 {
; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = shl <2 x i64> undef, undef
; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = shl <3 x i64> undef, undef
; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = shl <4 x i64> undef, undef
-; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = shl <5 x i64> undef, undef
+; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = shl <5 x i64> undef, undef
; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%i8 = shl i8 undef, undef
@@ -142,7 +142,7 @@ define amdgpu_kernel void @lshr() #0 {
; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = lshr <2 x i64> undef, undef
; FAST64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = lshr <3 x i64> undef, undef
; FAST64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = lshr <4 x i64> undef, undef
-; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = lshr <5 x i64> undef, undef
+; FAST64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = lshr <5 x i64> undef, undef
; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SLOW64-LABEL: 'lshr'
@@ -165,7 +165,7 @@ define amdgpu_kernel void @lshr() #0 {
; SLOW64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = lshr <2 x i64> undef, undef
; SLOW64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3i64 = lshr <3 x i64> undef, undef
; SLOW64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = lshr <4 x i64> undef, undef
-; SLOW64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5i64 = lshr <5 x i64> undef, undef
+; SLOW64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5i64 = lshr <5 x i64> undef, undef
; SLOW64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; FAST64-SIZE-LABEL: 'lshr'
@@ -188,7 +188,7 @@ define amdgpu_kernel void @lshr() #0 {
; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = lshr <2 x i64> undef, undef
; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = lshr <3 x i64> undef, undef
; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = lshr <4 x i64> undef, undef
-; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = lshr <5 x i64> undef, undef
+; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = lshr <5 x i64> undef, undef
; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SLOW64-SIZE-LABEL: 'lshr'
@@ -211,7 +211,7 @@ define amdgpu_kernel void @lshr() #0 {
; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = lshr <2 x i64> undef, undef
; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = lshr <3 x i64> undef, undef
; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = lshr <4 x i64> undef, undef
-; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = lshr <5 x i64> undef, undef
+; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = lshr <5 x i64> undef, undef
; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%i8 = lshr i8 undef, undef
@@ -258,7 +258,7 @@ define amdgpu_kernel void @ashr() #0 {
; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = ashr <2 x i64> undef, undef
; FAST64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = ashr <3 x i64> undef, undef
; FAST64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = ashr <4 x i64> undef, undef
-; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = ashr <5 x i64> undef, undef
+; FAST64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = ashr <5 x i64> undef, undef
; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; SLOW64-LABEL: 'ashr'
@@ -281,7 +281,7 @@ define amdgpu_kernel void @ashr() #0 {
; SLOW64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = ashr <2 x i64> undef, undef
; SLOW64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3i64 = ashr <3 x i64> undef, undef
; SLOW64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = ashr <4 x i64> undef, undef
-; SLOW64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5i64 = ashr <5 x i64> undef, undef
+; SLOW64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5i64 = ashr <5 x i64> undef, undef
; SLOW64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; FAST64-SIZE-LABEL: 'ashr'
@@ -304,7 +304,7 @@ define amdgpu_kernel void @ashr() #0 {
; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = ashr <2 x i64> undef, undef
; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = ashr <3 x i64> undef, undef
; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = ashr <4 x i64> undef, undef
-; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = ashr <5 x i64> undef, undef
+; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = ashr <5 x i64> undef, undef
; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SLOW64-SIZE-LABEL: 'ashr'
@@ -327,7 +327,7 @@ define amdgpu_kernel void @ashr() #0 {
; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = ashr <2 x i64> undef, undef
; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = ashr <3 x i64> undef, undef
; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = ashr <4 x i64> undef, undef
-; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = ashr <5 x i64> undef, undef
+; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = ashr <5 x i64> undef, undef
; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%i8 = ashr i8 undef, undef
More information about the llvm-commits
mailing list