[llvm] 57ca65e - [AArch64] Add instruction costs for FP_TO_UINT and FP_TO_SINT with half types
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 21 01:39:59 PDT 2021
Author: David Sherwood
Date: 2021-04-21T09:39:45+01:00
New Revision: 57ca65e21e9a48040e929a506a259ee2e4b42088
URL: https://github.com/llvm/llvm-project/commit/57ca65e21e9a48040e929a506a259ee2e4b42088
DIFF: https://github.com/llvm/llvm-project/commit/57ca65e21e9a48040e929a506a259ee2e4b42088.diff
LOG: [AArch64] Add instruction costs for FP_TO_UINT and FP_TO_SINT with half types
We were missing some instruction costs when converting vectors of
floating point half types into integers, so I've added those here.
I also manually generated assembly code for each FP->int case and
looked at the number of instructions generated, which meant
adjusting some of the existing costs too.
I've updated an existing test to reflect the new costs:
Analysis/CostModel/AArch64/sve-fptoi.ll
Differential Revision: https://reviews.llvm.org/D99935
Added:
Modified:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 990acd9c1b10..a3accfb5ac82 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -586,20 +586,13 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
- // Lowering scalable
+ // Complex, from nxv2f32.
+ { ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f32, 1 },
{ ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f32, 1 },
- { ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f32, 1 },
- { ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f64, 1 },
- { ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f32, 1 },
- { ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f32, 1 },
- { ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f64, 1 },
-
-
- // Complex, from nxv2f32 legal type is nxv2i32 (no cost) or nxv2i64 (1 ext)
- { ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f32, 2 },
{ ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f32, 1 },
{ ISD::FP_TO_SINT, MVT::nxv2i8, MVT::nxv2f32, 1 },
- { ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f32, 2 },
+ { ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f32, 1 },
{ ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f32, 1 },
{ ISD::FP_TO_UINT, MVT::nxv2i8, MVT::nxv2f32, 1 },
@@ -611,43 +604,75 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
- // Complex, from nxv2f64: legal type is nxv2i32, 1 narrowing => ~2.
- { ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f64, 2 },
- { ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f64, 2 },
- { ISD::FP_TO_SINT, MVT::nxv2i8, MVT::nxv2f64, 2 },
- { ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f64, 2 },
- { ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f64, 2 },
- { ISD::FP_TO_UINT, MVT::nxv2i8, MVT::nxv2f64, 2 },
-
- // Complex, from nxv4f32 legal type is nxv4i16, 1 narrowing => ~2
- { ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f32, 2 },
- { ISD::FP_TO_SINT, MVT::nxv4i8, MVT::nxv4f32, 2 },
- { ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f32, 2 },
- { ISD::FP_TO_UINT, MVT::nxv4i8, MVT::nxv4f32, 2 },
-
- // Complex, from nxv8f64: legal type is nxv8i32, 1 narrowing => ~2.
- { ISD::FP_TO_SINT, MVT::nxv8i32, MVT::nxv8f64, 2 },
- { ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f64, 2 },
- { ISD::FP_TO_SINT, MVT::nxv8i8, MVT::nxv8f64, 2 },
- { ISD::FP_TO_UINT, MVT::nxv8i32, MVT::nxv8f64, 2 },
- { ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f64, 2 },
- { ISD::FP_TO_UINT, MVT::nxv8i8, MVT::nxv8f64, 2 },
-
- // Complex, from nxv4f64: legal type is nxv4i32, 1 narrowing => ~2.
- { ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f64, 2 },
- { ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f64, 2 },
- { ISD::FP_TO_SINT, MVT::nxv4i8, MVT::nxv4f64, 2 },
- { ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f64, 2 },
- { ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f64, 2 },
- { ISD::FP_TO_UINT, MVT::nxv4i8, MVT::nxv4f64, 2 },
-
- // Complex, from nxv8f32: legal type is nxv8i32 (no cost) or nxv8i64 (1 ext).
- { ISD::FP_TO_SINT, MVT::nxv8i64, MVT::nxv8f32, 2 },
+ // Complex, from nxv2f64.
+ { ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f64, 1 },
+ { ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f64, 1 },
+ { ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f64, 1 },
+ { ISD::FP_TO_SINT, MVT::nxv2i8, MVT::nxv2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv2i8, MVT::nxv2f64, 1 },
+
+ // Complex, from nxv4f32.
+ { ISD::FP_TO_SINT, MVT::nxv4i64, MVT::nxv4f32, 4 },
+ { ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::nxv4i8, MVT::nxv4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv4i64, MVT::nxv4f32, 4 },
+ { ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv4i8, MVT::nxv4f32, 1 },
+
+ // Complex, from nxv8f64. Illegal -> illegal conversions not required.
+ { ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f64, 7 },
+ { ISD::FP_TO_SINT, MVT::nxv8i8, MVT::nxv8f64, 7 },
+ { ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f64, 7 },
+ { ISD::FP_TO_UINT, MVT::nxv8i8, MVT::nxv8f64, 7 },
+
+ // Complex, from nxv4f64. Illegal -> illegal conversions not required.
+ { ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f64, 3 },
+ { ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f64, 3 },
+ { ISD::FP_TO_SINT, MVT::nxv4i8, MVT::nxv4f64, 3 },
+ { ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f64, 3 },
+ { ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f64, 3 },
+ { ISD::FP_TO_UINT, MVT::nxv4i8, MVT::nxv4f64, 3 },
+
+ // Complex, from nxv8f32. Illegal -> illegal conversions not required.
{ ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f32, 3 },
- { ISD::FP_TO_SINT, MVT::nxv8i8, MVT::nxv8f32, 1 },
- { ISD::FP_TO_UINT, MVT::nxv8i64, MVT::nxv8f32, 2 },
- { ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f32, 1 },
- { ISD::FP_TO_UINT, MVT::nxv8i8, MVT::nxv8f32, 1 },
+ { ISD::FP_TO_SINT, MVT::nxv8i8, MVT::nxv8f32, 3 },
+ { ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f32, 3 },
+ { ISD::FP_TO_UINT, MVT::nxv8i8, MVT::nxv8f32, 3 },
+
+ // Complex, from nxv8f16.
+ { ISD::FP_TO_SINT, MVT::nxv8i64, MVT::nxv8f16, 10 },
+ { ISD::FP_TO_SINT, MVT::nxv8i32, MVT::nxv8f16, 4 },
+ { ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f16, 1 },
+ { ISD::FP_TO_SINT, MVT::nxv8i8, MVT::nxv8f16, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv8i64, MVT::nxv8f16, 10 },
+ { ISD::FP_TO_UINT, MVT::nxv8i32, MVT::nxv8f16, 4 },
+ { ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f16, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv8i8, MVT::nxv8f16, 1 },
+
+ // Complex, from nxv4f16.
+ { ISD::FP_TO_SINT, MVT::nxv4i64, MVT::nxv4f16, 4 },
+ { ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f16, 1 },
+ { ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f16, 1 },
+ { ISD::FP_TO_SINT, MVT::nxv4i8, MVT::nxv4f16, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv4i64, MVT::nxv4f16, 4 },
+ { ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f16, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f16, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv4i8, MVT::nxv4f16, 1 },
+
+ // Complex, from nxv2f16.
+ { ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f16, 1 },
+ { ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f16, 1 },
+ { ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f16, 1 },
+ { ISD::FP_TO_SINT, MVT::nxv2i8, MVT::nxv2f16, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f16, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f16, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f16, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv2i8, MVT::nxv2f16, 1 },
// Truncate from nxvmf32 to nxvmf16.
{ ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f32, 1 },
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll
index 68167654a44c..4715eaac47a4 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll
@@ -4,85 +4,124 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
define void @sve-fptoi() {
- ;CHECK-LABEL: 'sve-fptoi'
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_si8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_ui8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_si32 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_ui32 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f32_to_si64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f32_to_ui64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f64_to_si8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f64_to_ui8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f64_to_si32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f64_to_ui32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f64_to_si64 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i64>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f64_to_ui64 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i64>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f32_to_si8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f32_to_ui8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv4f32_to_si32 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv4f32_to_ui32 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nv4f32_to_si64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nv4f32_to_ui64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_si8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_ui8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_si32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_ui32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_si64 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i64>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_ui64 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i64>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv8f32_to_si8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv8f32_to_ui8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f32_to_si32 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f32_to_ui32 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f32_to_si64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f32_to_ui64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f64_to_si8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f64_to_ui8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f64_to_si32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f64_to_ui32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nv8f64_to_si64 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i64>
- ;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nv8f64_to_ui64 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i64>
+; CHECK-LABEL: 'sve-fptoi'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f16_to_si8 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f16_to_ui8 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f16_to_si32 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f16_to_ui32 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f16_to_si64 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f16_to_ui64 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_si8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_ui8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_si16 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_ui16 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_si64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_ui64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f64_to_si8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f64_to_ui8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f64_to_si16 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f64_to_ui16 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f64_to_si32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f64_to_ui32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv4f16_to_si8 = fptosi <vscale x 4 x half> undef to <vscale x 4 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv4f16_to_ui8 = fptoui <vscale x 4 x half> undef to <vscale x 4 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv4f16_to_si32 = fptosi <vscale x 4 x half> undef to <vscale x 4 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv4f16_to_ui32 = fptoui <vscale x 4 x half> undef to <vscale x 4 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nv4f16_to_si64 = fptosi <vscale x 4 x half> undef to <vscale x 4 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nv4f16_to_ui64 = fptoui <vscale x 4 x half> undef to <vscale x 4 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv4f32_to_si8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv4f32_to_ui8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv4f32_to_si16 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv4f32_to_ui16 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nv4f32_to_si64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nv4f32_to_ui64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nv4f64_to_si8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nv4f64_to_ui8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nv4f64_to_si16 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nv4f64_to_ui16 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nv4f64_to_si32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nv4f64_to_ui32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv8f16_to_si8 = fptosi <vscale x 8 x half> undef to <vscale x 8 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv8f16_to_ui8 = fptoui <vscale x 8 x half> undef to <vscale x 8 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nv8f16_to_si32 = fptosi <vscale x 8 x half> undef to <vscale x 8 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nv8f16_to_ui32 = fptoui <vscale x 8 x half> undef to <vscale x 8 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nv8f16_to_si64 = fptosi <vscale x 8 x half> undef to <vscale x 8 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nv8f16_to_ui64 = fptoui <vscale x 8 x half> undef to <vscale x 8 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nv8f32_to_si8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nv8f32_to_ui8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nv8f32_to_si16 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nv8f32_to_ui16 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nv8f32_to_si64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nv8f32_to_ui64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nv8f64_to_si8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nv8f64_to_ui8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nv8f64_to_si16 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nv8f64_to_ui16 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nv8f64_to_si32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nv8f64_to_ui32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
+
+ %nv2f16_to_si8 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i8>
+ %nv2f16_to_ui8 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i8>
+ %nv2f16_to_si32 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i32>
+ %nv2f16_to_ui32 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i32>
+ %nv2f16_to_si64 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i64>
+ %nv2f16_to_ui64 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i64>
%nv2f32_to_si8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
%nv2f32_to_ui8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
- %nv2f32_to_si32 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
- %nv2f32_to_ui32 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
+ %nv2f32_to_si16 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i16>
+ %nv2f32_to_ui16 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i16>
%nv2f32_to_si64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
%nv2f32_to_ui64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
%nv2f64_to_si8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
%nv2f64_to_ui8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
+ %nv2f64_to_si16 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i16>
+ %nv2f64_to_ui16 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i16>
%nv2f64_to_si32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
%nv2f64_to_ui32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
- %nv2f64_to_si64 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i64>
- %nv2f64_to_ui64 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i64>
+
+ %nv4f16_to_si8 = fptosi <vscale x 4 x half> undef to <vscale x 4 x i8>
+ %nv4f16_to_ui8 = fptoui <vscale x 4 x half> undef to <vscale x 4 x i8>
+ %nv4f16_to_si32 = fptosi <vscale x 4 x half> undef to <vscale x 4 x i32>
+ %nv4f16_to_ui32 = fptoui <vscale x 4 x half> undef to <vscale x 4 x i32>
+ %nv4f16_to_si64 = fptosi <vscale x 4 x half> undef to <vscale x 4 x i64>
+ %nv4f16_to_ui64 = fptoui <vscale x 4 x half> undef to <vscale x 4 x i64>
%nv4f32_to_si8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
%nv4f32_to_ui8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
- %nv4f32_to_si32 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
- %nv4f32_to_ui32 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
+ %nv4f32_to_si16 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i16>
+ %nv4f32_to_ui16 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i16>
%nv4f32_to_si64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
%nv4f32_to_ui64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
%nv4f64_to_si8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
%nv4f64_to_ui8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
+ %nv4f64_to_si16 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i16>
+ %nv4f64_to_ui16 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i16>
%nv4f64_to_si32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
%nv4f64_to_ui32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
- %nv4f64_to_si64 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i64>
- %nv4f64_to_ui64 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i64>
+
+ %nv8f16_to_si8 = fptosi <vscale x 8 x half> undef to <vscale x 8 x i8>
+ %nv8f16_to_ui8 = fptoui <vscale x 8 x half> undef to <vscale x 8 x i8>
+ %nv8f16_to_si32 = fptosi <vscale x 8 x half> undef to <vscale x 8 x i32>
+ %nv8f16_to_ui32 = fptoui <vscale x 8 x half> undef to <vscale x 8 x i32>
+ %nv8f16_to_si64 = fptosi <vscale x 8 x half> undef to <vscale x 8 x i64>
+ %nv8f16_to_ui64 = fptoui <vscale x 8 x half> undef to <vscale x 8 x i64>
%nv8f32_to_si8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
%nv8f32_to_ui8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
- %nv8f32_to_si32 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
- %nv8f32_to_ui32 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
+ %nv8f32_to_si16 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i16>
+ %nv8f32_to_ui16 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i16>
%nv8f32_to_si64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
%nv8f32_to_ui64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
%nv8f64_to_si8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
%nv8f64_to_ui8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
+ %nv8f64_to_si16 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i16>
+ %nv8f64_to_ui16 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i16>
%nv8f64_to_si32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
%nv8f64_to_ui32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
- %nv8f64_to_si64 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i64>
- %nv8f64_to_ui64 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i64>
ret void
}
More information about the llvm-commits
mailing list