[llvm] [AArch64] Add getVectorInstrCost Codesize costs handling. (PR #130946)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 12 04:50:40 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
We have a lot of missing Codesize costs for vector operations. This patch starts things off by adding codesize costs for getVectorInstrCost, returning a single cost instead of the VectorInsertExtractBaseCost (which is typically 2). Insert of a load are given a cost of 0 as they use ld1, otherwise the cost is 1.
---
Patch is 57.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130946.diff
8 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+14-9)
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h (+2-2)
- (modified) llvm/test/Analysis/CostModel/AArch64/arith-fp.ll (+6-6)
- (modified) llvm/test/Analysis/CostModel/AArch64/insert-extract.ll (+30-30)
- (modified) llvm/test/Analysis/CostModel/AArch64/min-max.ll (+16-16)
- (modified) llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll (+20-20)
- (modified) llvm/test/Analysis/CostModel/AArch64/shuffle-load.ll (+7-7)
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll (+28-28)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 7cec8a17dfaaa..70f78cbcd6119 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3302,8 +3302,8 @@ InstructionCost AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
}
InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(
- unsigned Opcode, Type *Val, unsigned Index, bool HasRealUse,
- const Instruction *I, Value *Scalar,
+ unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
+ bool HasRealUse, const Instruction *I, Value *Scalar,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
assert(Val->isVectorTy() && "This must be a vector type");
@@ -3336,12 +3336,16 @@ InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(
// and its second operand is a load, then we will generate a LD1, which
// are expensive instructions.
if (I && dyn_cast<LoadInst>(I->getOperand(1)))
- return ST->getVectorInsertExtractBaseCost() + 1;
+ return CostKind == TTI::TCK_CodeSize
+ ? 0
+ : ST->getVectorInsertExtractBaseCost() + 1;
// i1 inserts and extract will include an extra cset or cmp of the vector
// value. Increase the cost by 1 to account.
if (Val->getScalarSizeInBits() == 1)
- return ST->getVectorInsertExtractBaseCost() + 1;
+ return CostKind == TTI::TCK_CodeSize
+ ? 2
+ : ST->getVectorInsertExtractBaseCost() + 1;
// FIXME:
// If the extract-element and insert-element instructions could be
@@ -3465,7 +3469,8 @@ InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(
return 0;
// All other insert/extracts cost this much.
- return ST->getVectorInsertExtractBaseCost();
+ return CostKind == TTI::TCK_CodeSize ? 1
+ : ST->getVectorInsertExtractBaseCost();
}
InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
@@ -3474,22 +3479,22 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
Value *Op1) {
bool HasRealUse =
Opcode == Instruction::InsertElement && Op0 && !isa<UndefValue>(Op0);
- return getVectorInstrCostHelper(Opcode, Val, Index, HasRealUse);
+ return getVectorInstrCostHelper(Opcode, Val, CostKind, Index, HasRealUse);
}
InstructionCost AArch64TTIImpl::getVectorInstrCost(
unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
Value *Scalar,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
- return getVectorInstrCostHelper(Opcode, Val, Index, false, nullptr, Scalar,
- ScalarUserAndIdx);
+ return getVectorInstrCostHelper(Opcode, Val, CostKind, Index, false, nullptr,
+ Scalar, ScalarUserAndIdx);
}
InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I,
Type *Val,
TTI::TargetCostKind CostKind,
unsigned Index) {
- return getVectorInstrCostHelper(I.getOpcode(), Val, Index,
+ return getVectorInstrCostHelper(I.getOpcode(), Val, CostKind, Index,
true /* HasRealUse */, &I);
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 8a3fd11705640..a5db213cf1f84 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -73,8 +73,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
/// of the extract(nullptr if user is not known before vectorization) and
/// 'Idx' being the extract lane.
InstructionCost getVectorInstrCostHelper(
- unsigned Opcode, Type *Val, unsigned Index, bool HasRealUse,
- const Instruction *I = nullptr, Value *Scalar = nullptr,
+ unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
+ bool HasRealUse, const Instruction *I = nullptr, Value *Scalar = nullptr,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx = {});
public:
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
index 1efd41426c4ef..de1b39db1539c 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
@@ -536,9 +536,9 @@ define void @fsqrt() {
define void @fsqrt_fp16() {
; CHECK-BASE-LABEL: 'fsqrt_fp16'
; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %F16 = call half @llvm.sqrt.f16(half undef)
-; CHECK-BASE-NEXT: Cost Model: Found costs of 10 for: %V4F16 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
-; CHECK-BASE-NEXT: Cost Model: Found costs of 22 for: %V8F16 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
-; CHECK-BASE-NEXT: Cost Model: Found costs of 44 for: %V16F16 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V4F16 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %V8F16 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %V16F16 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-FP16-LABEL: 'fsqrt_fp16'
@@ -679,9 +679,9 @@ define void @fma() {
define void @fma_fp16() {
; CHECK-BASE-LABEL: 'fma_fp16'
; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %F16 = call half @llvm.fma.f16(half undef, half undef, half undef)
-; CHECK-BASE-NEXT: Cost Model: Found costs of 10 for: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
-; CHECK-BASE-NEXT: Cost Model: Found costs of 22 for: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
-; CHECK-BASE-NEXT: Cost Model: Found costs of 44 for: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-FP16-LABEL: 'fma_fp16'
diff --git a/llvm/test/Analysis/CostModel/AArch64/insert-extract.ll b/llvm/test/Analysis/CostModel/AArch64/insert-extract.ll
index 3baeb075f1813..cef6cf1a081e6 100644
--- a/llvm/test/Analysis/CostModel/AArch64/insert-extract.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/insert-extract.ll
@@ -11,38 +11,38 @@ target triple = "aarch64--linux-gnu"
define void @vectorInstrCost() {
; CHECK-LABEL: 'vectorInstrCost'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %ta0 = extractelement <8 x i1> undef, i32 0
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %ta1 = extractelement <8 x i1> undef, i32 1
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t1 = extractelement <8 x i8> undef, i32 0
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t2 = extractelement <8 x i8> undef, i32 1
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t3 = extractelement <4 x i16> undef, i32 0
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t4 = extractelement <4 x i16> undef, i32 1
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t5 = extractelement <2 x i32> undef, i32 0
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t6 = extractelement <2 x i32> undef, i32 1
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t7 = extractelement <2 x i64> undef, i32 0
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t8 = extractelement <2 x i64> undef, i32 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:3 SizeLat:3 for: %ta0 = extractelement <8 x i1> undef, i32 0
+; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:3 SizeLat:3 for: %ta1 = extractelement <8 x i1> undef, i32 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t1 = extractelement <8 x i8> undef, i32 0
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t2 = extractelement <8 x i8> undef, i32 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t3 = extractelement <4 x i16> undef, i32 0
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t4 = extractelement <4 x i16> undef, i32 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t5 = extractelement <2 x i32> undef, i32 0
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t6 = extractelement <2 x i32> undef, i32 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t7 = extractelement <2 x i64> undef, i32 0
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t8 = extractelement <2 x i64> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of 0 for: %t9 = extractelement <4 x half> undef, i32 0
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t10 = extractelement <4 x half> undef, i32 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t10 = extractelement <4 x half> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of 0 for: %t11 = extractelement <2 x float> undef, i32 0
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t12 = extractelement <2 x float> undef, i32 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t12 = extractelement <2 x float> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of 0 for: %t13 = extractelement <2 x double> undef, i32 0
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t14 = extractelement <2 x double> undef, i32 1
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %t31 = insertelement <8 x i1> undef, i1 false, i32 0
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %t41 = insertelement <8 x i1> undef, i1 true, i32 1
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t30 = insertelement <8 x i8> undef, i8 0, i32 0
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t40 = insertelement <8 x i8> undef, i8 1, i32 1
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t50 = insertelement <4 x i16> undef, i16 2, i32 0
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t60 = insertelement <4 x i16> undef, i16 3, i32 1
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t70 = insertelement <2 x i32> undef, i32 4, i32 0
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t80 = insertelement <2 x i32> undef, i32 5, i32 1
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t90 = insertelement <2 x i64> undef, i64 6, i32 0
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t100 = insertelement <2 x i64> undef, i64 7, i32 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t14 = extractelement <2 x double> undef, i32 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:3 SizeLat:3 for: %t31 = insertelement <8 x i1> undef, i1 false, i32 0
+; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:3 SizeLat:3 for: %t41 = insertelement <8 x i1> undef, i1 true, i32 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t30 = insertelement <8 x i8> undef, i8 0, i32 0
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t40 = insertelement <8 x i8> undef, i8 1, i32 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t50 = insertelement <4 x i16> undef, i16 2, i32 0
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t60 = insertelement <4 x i16> undef, i16 3, i32 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t70 = insertelement <2 x i32> undef, i32 4, i32 0
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t80 = insertelement <2 x i32> undef, i32 5, i32 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t90 = insertelement <2 x i64> undef, i64 6, i32 0
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t100 = insertelement <2 x i64> undef, i64 7, i32 1
; CHECK-NEXT: Cost Model: Found costs of 0 for: %t110 = insertelement <4 x half> zeroinitializer, half 0xH0000, i64 0
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t120 = insertelement <4 x half> zeroinitializer, half 0xH0000, i64 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t120 = insertelement <4 x half> zeroinitializer, half 0xH0000, i64 1
; CHECK-NEXT: Cost Model: Found costs of 0 for: %t130 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t140 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t140 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 1
; CHECK-NEXT: Cost Model: Found costs of 0 for: %t150 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 0
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %t160 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t160 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 1
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%ta0 = extractelement <8 x i1> undef, i32 0
@@ -86,7 +86,7 @@ define void @vectorInstrCost() {
define <8 x i8> @LD1_B(<8 x i8> %vec, ptr noundef %i) {
; CHECK-LABEL: 'LD1_B'
; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i8, ptr %i, align 1
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1
+; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:0 Lat:3 SizeLat:3 for: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i8> %v2
;
entry:
@@ -98,7 +98,7 @@ entry:
define <4 x i16> @LD1_H(<4 x i16> %vec, ptr noundef %i) {
; CHECK-LABEL: 'LD1_H'
; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i16, ptr %i, align 2
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2
+; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:0 Lat:3 SizeLat:3 for: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i16> %v2
;
entry:
@@ -110,7 +110,7 @@ entry:
define <4 x i32> @LD1_W(<4 x i32> %vec, ptr noundef %i) {
; CHECK-LABEL: 'LD1_W'
; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i32, ptr %i, align 4
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3
+; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:0 Lat:3 SizeLat:3 for: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %v2
;
entry:
@@ -122,7 +122,7 @@ entry:
define <2 x i64> @LD1_X(<2 x i64> %vec, ptr noundef %i) {
; CHECK-LABEL: 'LD1_X'
; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i64, ptr %i, align 8
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0
+; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:0 Lat:3 SizeLat:3 for: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %v2
;
entry:
diff --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll
index ed4b9dd2ba571..b824f5309adc1 100644
--- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll
@@ -195,10 +195,10 @@ define void @smax() {
define void @minnum16() {
; CHECK-NOF16-LABEL: 'minnum16'
; CHECK-NOF16-NEXT: Cost Model: Found costs of 1 for: %f16 = call half @llvm.minnum.f16(half undef, half undef)
-; CHECK-NOF16-NEXT: Cost Model: Found costs of 4 for: %V2f16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found costs of 10 for: %V4f16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found costs of 22 for: %V8f16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found costs of 44 for: %V16f16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %V2f16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V4f16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %V8f16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %V16f16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-F16-LABEL: 'minnum16'
@@ -220,10 +220,10 @@ define void @minnum16() {
define void @maxnum16() {
; CHECK-NOF16-LABEL: 'maxnum16'
; CHECK-NOF16-NEXT: Cost Model: Found costs of 1 for: %f16 = call half @llvm.maxnum.f16(half undef, half undef)
-; CHECK-NOF16-NEXT: Cost Model: Found costs of 4 for: %V2f16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found costs of 10 for: %V4f16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found costs of 22 for: %V8f16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found costs of 44 for: %V16f16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %V2f16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V4f16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %V8f16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %V16f16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-F16-LABEL: 'maxnum16'
@@ -288,10 +288,10 @@ define void @maxnum() {
define void @minimum16() {
; CHECK-NOF16-LABEL: 'minimum16'
; CHECK-NOF16-NEXT: Cost Model: Found costs of 1 for: %f16 = call half @llvm.minimum.f16(half undef, ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/130946
More information about the llvm-commits
mailing list