[llvm] 1159984 - [CostModel] Add fptoi_sat costmodel tests. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 25 10:44:40 PDT 2022


Author: David Green
Date: 2022-04-25T18:44:35+01:00
New Revision: 1159984802e736dc85395ffa589bb65c1005b95a

URL: https://github.com/llvm/llvm-project/commit/1159984802e736dc85395ffa589bb65c1005b95a
DIFF: https://github.com/llvm/llvm-project/commit/1159984802e736dc85395ffa589bb65c1005b95a.diff

LOG: [CostModel] Add fptoi_sat costmodel tests. NFC

Added: 
    llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll
    llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll
    llvm/test/Analysis/CostModel/X86/fptoi_sat.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll b/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll
new file mode 100644
index 0000000000000..d562393dee3bc
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll
@@ -0,0 +1,498 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=aarch64-none-linux-gnueabi %s | FileCheck --check-prefixes=CHECK %s
+; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=aarch64-none-linux-gnueabi -mattr=+fullfp16 %s | FileCheck --check-prefixes=CHECK %s
+
+define void @casts() {
+; CHECK-LABEL: 'casts'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef)
+  %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef)
+  %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef)
+  %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef)
+  %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef)
+  %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef)
+  %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef)
+  %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef)
+  %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef)
+  %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef)
+
+  %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef)
+  %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef)
+  %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef)
+  %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef)
+  %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef)
+  %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef)
+  %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef)
+  %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef)
+  %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef)
+  %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef)
+
+  %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef)
+  %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef)
+  %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef)
+  %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef)
+  %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef)
+  %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef)
+  %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef)
+  %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef)
+  %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef)
+  %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef)
+
+  %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef)
+  %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef)
+  %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef)
+  %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef)
+  %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef)
+  %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef)
+  %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef)
+  %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef)
+  %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef)
+  %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef)
+
+  %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef)
+  %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef)
+  %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef)
+  %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef)
+  %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef)
+  %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
+  %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
+  %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
+  %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
+  %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
+
+  %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
+  %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
+  %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
+  %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef)
+  %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef)
+  %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
+  %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
+  %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
+  %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
+  %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
+
+  %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
+  %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
+  %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
+  %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef)
+  %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef)
+  %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
+  %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
+  %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
+  %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
+  %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
+
+  %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
+  %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
+  %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
+  %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef)
+  %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef)
+  %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
+  %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
+  %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
+  %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
+  %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
+
+  %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
+  %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
+  %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
+  %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef)
+  %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef)
+  %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
+  %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
+  %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
+  %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
+  %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
+
+  %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
+  %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
+  %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
+  %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef)
+  %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef)
+  %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
+  %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
+  %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
+  %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
+  %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
+
+  ret void
+}
+
+define void @fp16() {
+; CHECK-LABEL: 'fp16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef)
+  %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef)
+  %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef)
+  %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef)
+  %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef)
+  %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
+  %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
+  %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
+  %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
+  %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
+
+  %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
+  %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
+  %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
+  %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
+  %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
+  %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
+  %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
+  %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
+  %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
+  %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
+
+  %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
+  %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
+  %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
+  %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
+  %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
+  %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
+  %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
+  %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
+  %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
+  %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
+
+  %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
+  %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
+  %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
+  %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
+  %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
+  %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
+  %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
+  %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
+  %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
+  %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
+
+  %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
+  %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
+  %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
+  %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
+  %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
+  %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
+  %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
+  %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
+  %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
+  %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
+
+  ret void
+}
+
+
+declare i1 @llvm.fptosi.sat.i1.f32(float)
+declare i1 @llvm.fptoui.sat.i1.f32(float)
+declare i8 @llvm.fptosi.sat.i8.f32(float)
+declare i8 @llvm.fptoui.sat.i8.f32(float)
+declare i16 @llvm.fptosi.sat.i16.f32(float)
+declare i16 @llvm.fptoui.sat.i16.f32(float)
+declare i32 @llvm.fptosi.sat.i32.f32(float)
+declare i32 @llvm.fptoui.sat.i32.f32(float)
+declare i64 @llvm.fptosi.sat.i64.f32(float)
+declare i64 @llvm.fptoui.sat.i64.f32(float)
+
+declare i1 @llvm.fptosi.sat.i1.f64(double)
+declare i1 @llvm.fptoui.sat.i1.f64(double)
+declare i8 @llvm.fptosi.sat.i8.f64(double)
+declare i8 @llvm.fptoui.sat.i8.f64(double)
+declare i16 @llvm.fptosi.sat.i16.f64(double)
+declare i16 @llvm.fptoui.sat.i16.f64(double)
+declare i32 @llvm.fptosi.sat.i32.f64(double)
+declare i32 @llvm.fptoui.sat.i32.f64(double)
+declare i64 @llvm.fptosi.sat.i64.f64(double)
+declare i64 @llvm.fptoui.sat.i64.f64(double)
+
+declare <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float>)
+declare <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float>)
+declare <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float>)
+declare <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float>)
+declare <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float>)
+declare <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float>)
+declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float>)
+declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float>)
+declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float>)
+declare <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float>)
+
+declare <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double>)
+declare <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double>)
+declare <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double>)
+declare <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double>)
+declare <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double>)
+declare <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double>)
+declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double>)
+declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double>)
+declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double>)
+declare <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double>)
+
+declare <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float>)
+declare <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float>)
+declare <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float>)
+declare <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float>)
+declare <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float>)
+declare <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float>)
+declare <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float>)
+declare <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float>)
+declare <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float>)
+declare <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float>)
+
+declare <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double>)
+declare <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double>)
+declare <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double>)
+declare <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double>)
+declare <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double>)
+declare <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double>)
+declare <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double>)
+declare <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double>)
+declare <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double>)
+declare <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double>)
+
+declare <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float>)
+declare <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float>)
+declare <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float>)
+declare <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float>)
+declare <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float>)
+declare <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float>)
+declare <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float>)
+declare <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float>)
+declare <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float>)
+declare <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float>)
+
+declare <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double>)
+declare <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double>)
+declare <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double>)
+declare <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double>)
+declare <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double>)
+declare <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double>)
+declare <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double>)
+declare <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double>)
+declare <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double>)
+declare <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double>)
+
+declare <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float>)
+declare <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float>)
+declare <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float>)
+declare <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float>)
+declare <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float>)
+declare <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float>)
+declare <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float>)
+declare <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float>)
+declare <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float>)
+declare <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float>)
+
+declare <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double>)
+declare <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double>)
+declare <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double>)
+declare <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double>)
+declare <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double>)
+declare <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double>)
+declare <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double>)
+declare <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double>)
+declare <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double>)
+declare <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double>)
+
+declare i1 @llvm.fptosi.sat.i1.f16(half)
+declare i1 @llvm.fptoui.sat.i1.f16(half)
+declare i8 @llvm.fptosi.sat.i8.f16(half)
+declare i8 @llvm.fptoui.sat.i8.f16(half)
+declare i16 @llvm.fptosi.sat.i16.f16(half)
+declare i16 @llvm.fptoui.sat.i16.f16(half)
+declare i32 @llvm.fptosi.sat.i32.f16(half)
+declare i32 @llvm.fptoui.sat.i32.f16(half)
+declare i64 @llvm.fptosi.sat.i64.f16(half)
+declare i64 @llvm.fptoui.sat.i64.f16(half)
+
+declare <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half>)
+declare <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half>)
+declare <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half>)
+declare <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half>)
+declare <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half>)
+declare <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half>)
+declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half>)
+declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half>)
+declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half>)
+declare <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half>)
+
+declare <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half>)
+declare <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half>)
+declare <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half>)
+declare <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half>)
+declare <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half>)
+declare <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half>)
+declare <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half>)
+declare <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half>)
+declare <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half>)
+declare <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half>)
+
+declare <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half>)
+declare <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half>)
+declare <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half>)
+declare <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half>)
+declare <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half>)
+declare <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half>)
+declare <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half>)
+declare <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half>)
+declare <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half>)
+declare <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half>)
+
+declare <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half>)
+declare <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half>)
+declare <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half>)
+declare <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half>)
+declare <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half>)
+declare <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half>)
+declare <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half>)
+declare <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half>)
+declare <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half>)
+declare <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half>)

diff  --git a/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll b/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll
new file mode 100644
index 0000000000000..1d81c7b6fe7a2
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll
@@ -0,0 +1,654 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE
+; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVEFP
+
+define void @casts() {
+; CHECK-MVE-LABEL: 'casts'
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEFP-LABEL: 'casts'
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef)
+  %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef)
+  %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef)
+  %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef)
+  %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef)
+  %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef)
+  %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef)
+  %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef)
+  %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef)
+  %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef)
+
+  %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef)
+  %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef)
+  %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef)
+  %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef)
+  %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef)
+  %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef)
+  %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef)
+  %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef)
+  %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef)
+  %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef)
+
+  %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef)
+  %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef)
+  %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef)
+  %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef)
+  %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef)
+  %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef)
+  %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef)
+  %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef)
+  %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef)
+  %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef)
+
+  %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef)
+  %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef)
+  %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef)
+  %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef)
+  %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef)
+  %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef)
+  %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef)
+  %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef)
+  %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef)
+  %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef)
+
+  %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef)
+  %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef)
+  %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef)
+  %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef)
+  %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef)
+  %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
+  %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
+  %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
+  %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
+  %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
+
+  %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
+  %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
+  %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
+  %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef)
+  %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef)
+  %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
+  %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
+  %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
+  %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
+  %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
+
+  %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
+  %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
+  %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
+  %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef)
+  %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef)
+  %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
+  %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
+  %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
+  %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
+  %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
+
+  %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
+  %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
+  %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
+  %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef)
+  %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef)
+  %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
+  %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
+  %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
+  %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
+  %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
+
+  %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
+  %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
+  %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
+  %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef)
+  %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef)
+  %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
+  %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
+  %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
+  %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
+  %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
+
+  %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
+  %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
+  %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
+  %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef)
+  %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef)
+  %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
+  %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
+  %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
+  %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
+  %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
+
+  ret void
+}
+
+define void @fp16() {
+; CHECK-MVE-LABEL: 'fp16'
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEFP-LABEL: 'fp16'
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef)
+  %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef)
+  %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef)
+  %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef)
+  %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef)
+  %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
+  %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
+  %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
+  %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
+  %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
+
+  %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
+  %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
+  %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
+  %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
+  %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
+  %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
+  %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
+  %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
+  %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
+  %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
+
+  %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
+  %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
+  %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
+  %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
+  %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
+  %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
+  %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
+  %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
+  %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
+  %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
+
+  %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
+  %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
+  %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
+  %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
+  %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
+  %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
+  %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
+  %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
+  %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
+  %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
+
+  %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
+  %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
+  %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
+  %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
+  %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
+  %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
+  %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
+  %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
+  %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
+  %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
+
+  ret void
+}
+
+
+declare i1 @llvm.fptosi.sat.i1.f32(float)
+declare i1 @llvm.fptoui.sat.i1.f32(float)
+declare i8 @llvm.fptosi.sat.i8.f32(float)
+declare i8 @llvm.fptoui.sat.i8.f32(float)
+declare i16 @llvm.fptosi.sat.i16.f32(float)
+declare i16 @llvm.fptoui.sat.i16.f32(float)
+declare i32 @llvm.fptosi.sat.i32.f32(float)
+declare i32 @llvm.fptoui.sat.i32.f32(float)
+declare i64 @llvm.fptosi.sat.i64.f32(float)
+declare i64 @llvm.fptoui.sat.i64.f32(float)
+
+declare i1 @llvm.fptosi.sat.i1.f64(double)
+declare i1 @llvm.fptoui.sat.i1.f64(double)
+declare i8 @llvm.fptosi.sat.i8.f64(double)
+declare i8 @llvm.fptoui.sat.i8.f64(double)
+declare i16 @llvm.fptosi.sat.i16.f64(double)
+declare i16 @llvm.fptoui.sat.i16.f64(double)
+declare i32 @llvm.fptosi.sat.i32.f64(double)
+declare i32 @llvm.fptoui.sat.i32.f64(double)
+declare i64 @llvm.fptosi.sat.i64.f64(double)
+declare i64 @llvm.fptoui.sat.i64.f64(double)
+
+declare <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float>)
+declare <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float>)
+declare <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float>)
+declare <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float>)
+declare <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float>)
+declare <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float>)
+declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float>)
+declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float>)
+declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float>)
+declare <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float>)
+
+declare <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double>)
+declare <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double>)
+declare <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double>)
+declare <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double>)
+declare <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double>)
+declare <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double>)
+declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double>)
+declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double>)
+declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double>)
+declare <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double>)
+
+declare <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float>)
+declare <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float>)
+declare <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float>)
+declare <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float>)
+declare <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float>)
+declare <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float>)
+declare <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float>)
+declare <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float>)
+declare <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float>)
+declare <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float>)
+
+declare <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double>)
+declare <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double>)
+declare <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double>)
+declare <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double>)
+declare <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double>)
+declare <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double>)
+declare <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double>)
+declare <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double>)
+declare <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double>)
+declare <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double>)
+
+declare <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float>)
+declare <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float>)
+declare <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float>)
+declare <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float>)
+declare <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float>)
+declare <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float>)
+declare <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float>)
+declare <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float>)
+declare <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float>)
+declare <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float>)
+
+declare <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double>)
+declare <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double>)
+declare <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double>)
+declare <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double>)
+declare <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double>)
+declare <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double>)
+declare <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double>)
+declare <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double>)
+declare <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double>)
+declare <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double>)
+
+declare <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float>)
+declare <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float>)
+declare <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float>)
+declare <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float>)
+declare <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float>)
+declare <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float>)
+declare <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float>)
+declare <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float>)
+declare <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float>)
+declare <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float>)
+
+declare <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double>)
+declare <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double>)
+declare <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double>)
+declare <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double>)
+declare <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double>)
+declare <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double>)
+declare <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double>)
+declare <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double>)
+declare <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double>)
+declare <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double>)
+
+declare i1 @llvm.fptosi.sat.i1.f16(half)
+declare i1 @llvm.fptoui.sat.i1.f16(half)
+declare i8 @llvm.fptosi.sat.i8.f16(half)
+declare i8 @llvm.fptoui.sat.i8.f16(half)
+declare i16 @llvm.fptosi.sat.i16.f16(half)
+declare i16 @llvm.fptoui.sat.i16.f16(half)
+declare i32 @llvm.fptosi.sat.i32.f16(half)
+declare i32 @llvm.fptoui.sat.i32.f16(half)
+declare i64 @llvm.fptosi.sat.i64.f16(half)
+declare i64 @llvm.fptoui.sat.i64.f16(half)
+
+declare <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half>)
+declare <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half>)
+declare <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half>)
+declare <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half>)
+declare <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half>)
+declare <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half>)
+declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half>)
+declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half>)
+declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half>)
+declare <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half>)
+
+declare <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half>)
+declare <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half>)
+declare <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half>)
+declare <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half>)
+declare <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half>)
+declare <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half>)
+declare <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half>)
+declare <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half>)
+declare <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half>)
+declare <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half>)
+
+declare <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half>)
+declare <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half>)
+declare <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half>)
+declare <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half>)
+declare <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half>)
+declare <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half>)
+declare <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half>)
+declare <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half>)
+declare <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half>)
+declare <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half>)
+
+declare <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half>)
+declare <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half>)
+declare <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half>)
+declare <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half>)
+declare <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half>)
+declare <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half>)
+declare <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half>)
+declare <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half>)
+declare <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half>)
+declare <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half>)

diff  --git a/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll b/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll
new file mode 100644
index 0000000000000..60e8c80c94c18
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll
@@ -0,0 +1,1286 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx  | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512
+;
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mcpu=slm | FileCheck %s --check-prefixes=SLM
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1
+
+define void @casts() {
+; SSE2-LABEL: 'casts'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SSE42-LABEL: 'casts'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX1-LABEL: 'casts'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX2-LABEL: 'casts'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX512-LABEL: 'casts'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SLM-LABEL: 'casts'
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef)
+  %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef)
+  %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef)
+  %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef)
+  %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef)
+  %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef)
+  %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef)
+  %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef)
+  %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef)
+  %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef)
+
+  %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef)
+  %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef)
+  %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef)
+  %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef)
+  %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef)
+  %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef)
+  %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef)
+  %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef)
+  %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef)
+  %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef)
+
+  %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef)
+  %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef)
+  %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef)
+  %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef)
+  %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef)
+  %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef)
+  %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef)
+  %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef)
+  %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef)
+  %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef)
+
+  %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef)
+  %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef)
+  %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef)
+  %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef)
+  %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef)
+  %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef)
+  %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef)
+  %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef)
+  %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef)
+  %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef)
+
+  %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef)
+  %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef)
+  %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef)
+  %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef)
+  %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef)
+  %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
+  %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
+  %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
+  %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
+  %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
+
+  %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
+  %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
+  %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
+  %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef)
+  %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef)
+  %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
+  %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
+  %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
+  %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
+  %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
+
+  %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
+  %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
+  %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
+  %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef)
+  %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef)
+  %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
+  %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
+  %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
+  %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
+  %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
+
+  %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
+  %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
+  %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
+  %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef)
+  %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef)
+  %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
+  %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
+  %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
+  %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
+  %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
+
+  %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
+  %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
+  %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
+  %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef)
+  %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef)
+  %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
+  %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
+  %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
+  %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
+  %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
+
+  %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
+  %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
+  %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
+  %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef)
+  %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef)
+  %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
+  %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
+  %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
+  %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
+  %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
+
+  ret void
+}
+
+define void @fp16() {
+; SSE2-LABEL: 'fp16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SSE42-LABEL: 'fp16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX1-LABEL: 'fp16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX2-LABEL: 'fp16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX512-LABEL: 'fp16'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SLM-LABEL: 'fp16'
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef)
+  %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef)
+  %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef)
+  %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef)
+  %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef)
+  %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef)
+  %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef)
+  %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef)
+  %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef)
+  %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef)
+
+  %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef)
+  %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef)
+  %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef)
+  %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef)
+  %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef)
+  %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef)
+  %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef)
+  %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef)
+  %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef)
+  %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef)
+
+  %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef)
+  %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef)
+  %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef)
+  %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef)
+  %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef)
+  %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
+  %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
+  %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
+  %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
+  %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
+
+  %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
+  %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
+  %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
+  %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
+  %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
+  %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
+  %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
+  %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
+  %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
+  %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
+
+  %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
+  %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
+  %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
+  %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
+  %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
+  %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
+  %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
+  %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
+  %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
+  %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
+
+  ret void
+}
+
+
+declare i1 @llvm.fptosi.sat.i1.f32(float)
+declare i1 @llvm.fptoui.sat.i1.f32(float)
+declare i8 @llvm.fptosi.sat.i8.f32(float)
+declare i8 @llvm.fptoui.sat.i8.f32(float)
+declare i16 @llvm.fptosi.sat.i16.f32(float)
+declare i16 @llvm.fptoui.sat.i16.f32(float)
+declare i32 @llvm.fptosi.sat.i32.f32(float)
+declare i32 @llvm.fptoui.sat.i32.f32(float)
+declare i64 @llvm.fptosi.sat.i64.f32(float)
+declare i64 @llvm.fptoui.sat.i64.f32(float)
+
+declare i1 @llvm.fptosi.sat.i1.f64(double)
+declare i1 @llvm.fptoui.sat.i1.f64(double)
+declare i8 @llvm.fptosi.sat.i8.f64(double)
+declare i8 @llvm.fptoui.sat.i8.f64(double)
+declare i16 @llvm.fptosi.sat.i16.f64(double)
+declare i16 @llvm.fptoui.sat.i16.f64(double)
+declare i32 @llvm.fptosi.sat.i32.f64(double)
+declare i32 @llvm.fptoui.sat.i32.f64(double)
+declare i64 @llvm.fptosi.sat.i64.f64(double)
+declare i64 @llvm.fptoui.sat.i64.f64(double)
+
+declare <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float>)
+declare <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float>)
+declare <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float>)
+declare <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float>)
+declare <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float>)
+declare <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float>)
+declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float>)
+declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float>)
+declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float>)
+declare <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float>)
+
+declare <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double>)
+declare <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double>)
+declare <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double>)
+declare <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double>)
+declare <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double>)
+declare <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double>)
+declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double>)
+declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double>)
+declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double>)
+declare <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double>)
+
+declare <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float>)
+declare <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float>)
+declare <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float>)
+declare <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float>)
+declare <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float>)
+declare <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float>)
+declare <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float>)
+declare <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float>)
+declare <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float>)
+declare <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float>)
+
+declare <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double>)
+declare <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double>)
+declare <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double>)
+declare <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double>)
+declare <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double>)
+declare <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double>)
+declare <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double>)
+declare <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double>)
+declare <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double>)
+declare <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double>)
+
+declare <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float>)
+declare <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float>)
+declare <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float>)
+declare <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float>)
+declare <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float>)
+declare <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float>)
+declare <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float>)
+declare <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float>)
+declare <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float>)
+declare <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float>)
+
+declare <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double>)
+declare <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double>)
+declare <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double>)
+declare <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double>)
+declare <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double>)
+declare <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double>)
+declare <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double>)
+declare <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double>)
+declare <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double>)
+declare <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double>)
+
+declare <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float>)
+declare <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float>)
+declare <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float>)
+declare <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float>)
+declare <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float>)
+declare <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float>)
+declare <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float>)
+declare <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float>)
+declare <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float>)
+declare <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float>)
+
+declare <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double>)
+declare <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double>)
+declare <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double>)
+declare <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double>)
+declare <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double>)
+declare <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double>)
+declare <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double>)
+declare <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double>)
+declare <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double>)
+declare <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double>)
+
+declare i1 @llvm.fptosi.sat.i1.f16(half)
+declare i1 @llvm.fptoui.sat.i1.f16(half)
+declare i8 @llvm.fptosi.sat.i8.f16(half)
+declare i8 @llvm.fptoui.sat.i8.f16(half)
+declare i16 @llvm.fptosi.sat.i16.f16(half)
+declare i16 @llvm.fptoui.sat.i16.f16(half)
+declare i32 @llvm.fptosi.sat.i32.f16(half)
+declare i32 @llvm.fptoui.sat.i32.f16(half)
+declare i64 @llvm.fptosi.sat.i64.f16(half)
+declare i64 @llvm.fptoui.sat.i64.f16(half)
+
+declare <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half>)
+declare <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half>)
+declare <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half>)
+declare <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half>)
+declare <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half>)
+declare <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half>)
+declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half>)
+declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half>)
+declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half>)
+declare <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half>)
+
+declare <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half>)
+declare <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half>)
+declare <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half>)
+declare <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half>)
+declare <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half>)
+declare <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half>)
+declare <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half>)
+declare <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half>)
+declare <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half>)
+declare <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half>)
+
+declare <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half>)
+declare <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half>)
+declare <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half>)
+declare <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half>)
+declare <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half>)
+declare <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half>)
+declare <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half>)
+declare <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half>)
+declare <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half>)
+declare <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half>)
+
+declare <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half>)
+declare <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half>)
+declare <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half>)
+declare <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half>)
+declare <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half>)
+declare <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half>)
+declare <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half>)
+declare <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half>)
+declare <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half>)
+declare <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half>)


        


More information about the llvm-commits mailing list