[llvm] 6907ab4 - [AArch64] Extend costs for fptoi.sat intrinsics.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 28 02:47:47 PDT 2024
Author: David Green
Date: 2024-07-28T10:47:40+01:00
New Revision: 6907ab49399f131e04ea1816d155107e92d3b1aa
URL: https://github.com/llvm/llvm-project/commit/6907ab49399f131e04ea1816d155107e92d3b1aa
DIFF: https://github.com/llvm/llvm-project/commit/6907ab49399f131e04ea1816d155107e92d3b1aa.diff
LOG: [AArch64] Extend costs for fptoi.sat intrinsics.
Most of these bring the costs in line with the code generation. The f16 costs
without FullFP16 are usually converted to f32. Extended v2f32->v2f64 vectors
similarly use fcvtl + fcvt. As a backup we use the costs similar to the target
independent code, which should give a relatively high cost.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll
llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 7de813f603264..79c0e45e3aa5b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -748,22 +748,44 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
// output are the same, or we are using cvt f64->i32 or f32->i64.
if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
- LT.second == MVT::v2f64) &&
- (LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits() ||
- (LT.second == MVT::f64 && MTy == MVT::i32) ||
- (LT.second == MVT::f32 && MTy == MVT::i64)))
- return LT.first;
- // Similarly for fp16 sizes
- if (ST->hasFullFP16() &&
- ((LT.second == MVT::f16 && MTy == MVT::i32) ||
- ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
- (LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits()))))
+ LT.second == MVT::v2f64)) {
+ if ((LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits() ||
+ (LT.second == MVT::f64 && MTy == MVT::i32) ||
+ (LT.second == MVT::f32 && MTy == MVT::i64)))
+ return LT.first;
+ // Extending vector types v2f32->v2i64, fcvtl*2 + fcvt*2
+ if (LT.second.getScalarType() == MVT::f32 && MTy.isFixedLengthVector() &&
+ MTy.getScalarSizeInBits() == 64)
+ return LT.first * (MTy.getVectorNumElements() > 2 ? 4 : 2);
+ }
+ // Similarly for fp16 sizes. Without FullFP16 we generally need to fcvt to
+ // f32.
+ if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
+ return LT.first + getIntrinsicInstrCost(
+ {ICA.getID(),
+ RetTy,
+ {ICA.getArgTypes()[0]->getWithNewType(
+ Type::getFloatTy(RetTy->getContext()))}},
+ CostKind);
+ if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
+ (LT.second == MVT::f16 && MTy == MVT::i64) ||
+ ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
+ (LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits())))
return LT.first;
-
- // Otherwise we use a legal convert followed by a min+max
+ // Extending vector types v8f16->v8i32, fcvtl*2 + fcvt*2
+ if (LT.second.getScalarType() == MVT::f16 && MTy.isFixedLengthVector() &&
+ MTy.getScalarSizeInBits() == 32)
+ return LT.first * (MTy.getVectorNumElements() > 4 ? 4 : 2);
+ // Extending vector types v8f16->v8i32. These current scalarize but the
+ // codegen could be better.
+ if (LT.second.getScalarType() == MVT::f16 && MTy.isFixedLengthVector() &&
+ MTy.getScalarSizeInBits() == 64)
+ return MTy.getVectorNumElements() * 3;
+
+ // If we can we use a legal convert followed by a min+max
if ((LT.second.getScalarType() == MVT::f32 ||
LT.second.getScalarType() == MVT::f64 ||
- (ST->hasFullFP16() && LT.second.getScalarType() == MVT::f16)) &&
+ LT.second.getScalarType() == MVT::f16) &&
LT.second.getScalarSizeInBits() >= MTy.getScalarSizeInBits()) {
Type *LegalTy =
Type::getIntNTy(RetTy->getContext(), LT.second.getScalarSizeInBits());
@@ -776,9 +798,33 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
IntrinsicCostAttributes Attrs2(IsSigned ? Intrinsic::smax : Intrinsic::umax,
LegalTy, {LegalTy, LegalTy});
Cost += getIntrinsicInstrCost(Attrs2, CostKind);
- return LT.first * Cost;
+ return LT.first * Cost +
+ ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
+ : 1);
}
- break;
+ // Otherwise we need to follow the default expansion that clamps the value
+ // using a float min/max with a fcmp+sel for nan handling when signed.
+ Type *FPTy = ICA.getArgTypes()[0]->getScalarType();
+ RetTy = RetTy->getScalarType();
+ if (LT.second.isVector()) {
+ FPTy = VectorType::get(FPTy, LT.second.getVectorElementCount());
+ RetTy = VectorType::get(RetTy, LT.second.getVectorElementCount());
+ }
+ IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FPTy, {FPTy, FPTy});
+ InstructionCost Cost = getIntrinsicInstrCost(Attrs1, CostKind);
+ IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FPTy, {FPTy, FPTy});
+ Cost += getIntrinsicInstrCost(Attrs2, CostKind);
+ Cost +=
+ getCastInstrCost(IsSigned ? Instruction::FPToSI : Instruction::FPToUI,
+ RetTy, FPTy, TTI::CastContextHint::None, CostKind);
+ if (IsSigned) {
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
+ Cost += getCmpSelInstrCost(BinaryOperator::FCmp, FPTy, CondTy,
+ CmpInst::FCMP_UNO, CostKind);
+ Cost += getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+ CmpInst::FCMP_UNO, CostKind);
+ }
+ return LT.first * Cost;
}
case Intrinsic::fshl:
case Intrinsic::fshr: {
diff --git a/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll b/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll
index e4e29143985b2..c45b6c3c5dcab 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll
@@ -34,8 +34,8 @@ define void @casts() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef)
@@ -54,8 +54,8 @@ define void @casts() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
@@ -74,8 +74,8 @@ define void @casts() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
@@ -94,8 +94,8 @@ define void @casts() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
@@ -223,56 +223,56 @@ define void @casts() {
define void @fp16() {
; CHECK-NOFP16-LABEL: 'fp16'
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 149 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 325 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 373 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 281 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 373 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 281 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 342 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 650 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
-; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
+; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-FP16-LABEL: 'fp16'
@@ -284,48 +284,48 @@ define void @fp16() {
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 206 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
-; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
+; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef)
diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll
index 5fea6f669ead6..2518a30838242 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll
@@ -92,16 +92,10 @@ define i32 @f64_i16(double %in) {
}
define i64 @f16_i32(half %in) {
-; CHECK-FP-LABEL: @f16_i32(
-; CHECK-FP-NEXT: [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64
-; CHECK-FP-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647)
-; CHECK-FP-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648)
-; CHECK-FP-NEXT: ret i64 [[MAX]]
-;
-; CHECK-FP16-LABEL: @f16_i32(
-; CHECK-FP16-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f16(half [[IN:%.*]])
-; CHECK-FP16-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
-; CHECK-FP16-NEXT: ret i64 [[TMP2]]
+; CHECK-LABEL: @f16_i32(
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f16(half [[IN:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+; CHECK-NEXT: ret i64 [[TMP2]]
;
%conv = fptosi half %in to i64
%min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
@@ -185,16 +179,10 @@ define <8 x i64> @v8f32_i32(<8 x float> %in) {
}
define <4 x i32> @v4f16_i16(<4 x half> %in) {
-; CHECK-FP-LABEL: @v4f16_i16(
-; CHECK-FP-NEXT: [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32>
-; CHECK-FP-NEXT: [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
-; CHECK-FP-NEXT: [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
-; CHECK-FP-NEXT: ret <4 x i32> [[MAX]]
-;
-; CHECK-FP16-LABEL: @v4f16_i16(
-; CHECK-FP16-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[IN:%.*]])
-; CHECK-FP16-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
-; CHECK-FP16-NEXT: ret <4 x i32> [[TMP2]]
+; CHECK-LABEL: @v4f16_i16(
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[IN:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%conv = fptosi <4 x half> %in to <4 x i32>
%min = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
@@ -203,16 +191,10 @@ define <4 x i32> @v4f16_i16(<4 x half> %in) {
}
define <8 x i32> @v8f16_i16(<8 x half> %in) {
-; CHECK-FP-LABEL: @v8f16_i16(
-; CHECK-FP-NEXT: [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32>
-; CHECK-FP-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
-; CHECK-FP-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
-; CHECK-FP-NEXT: ret <8 x i32> [[MAX]]
-;
-; CHECK-FP16-LABEL: @v8f16_i16(
-; CHECK-FP16-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[IN:%.*]])
-; CHECK-FP16-NEXT: [[TMP2:%.*]] = sext <8 x i16> [[TMP1]] to <8 x i32>
-; CHECK-FP16-NEXT: ret <8 x i32> [[TMP2]]
+; CHECK-LABEL: @v8f16_i16(
+; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[IN:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i16> [[TMP1]] to <8 x i32>
+; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%conv = fptosi <8 x half> %in to <8 x i32>
%min = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
@@ -292,3 +274,6 @@ declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>)
declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-FP: {{.*}}
+; CHECK-FP16: {{.*}}
More information about the llvm-commits
mailing list