[llvm] 19601a4 - [SVE][Analysis]Instruction costs for ops on scalable-vec

Nashe Mncube via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 29 03:16:04 PDT 2021


Author: Nashe Mncube
Date: 2021-03-29T11:15:50+01:00
New Revision: 19601a4c6c4887e167dabf64bb2393dd1c399fe9

URL: https://github.com/llvm/llvm-project/commit/19601a4c6c4887e167dabf64bb2393dd1c399fe9
DIFF: https://github.com/llvm/llvm-project/commit/19601a4c6c4887e167dabf64bb2393dd1c399fe9.diff

LOG: [SVE][Analysis]Instruction costs for ops on scalable-vec

The following operations have no associated cost for them
when applied to scalable vectors, and as a consequence
can trigger a crash when a call is made to
AArch64TTIImpl::getCastInstrCost():
- fptrunc
- trunc
- fpext
- fpto(u,s)i

This patch adds costs for these operations and
relevant regression tests.

Differential Revision: https://reviews.llvm.org/D98934

Added: 
    llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll
    llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll
    llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index d199213e140d..d6eb3c6b8e56 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -391,6 +391,23 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     { ISD::TRUNCATE, MVT::v8i8,  MVT::v8i32,  3 },
     { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
 
+    // Truncations on nxvmiN
+    { ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i16, 1 },
+    { ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i32, 1 },
+    { ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i64, 1 },
+    { ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i16, 1 },
+    { ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i32, 1 },
+    { ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i64, 2 },
+    { ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i16, 1 },
+    { ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i32, 3 },
+    { ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i64, 5 },
+    { ISD::TRUNCATE, MVT::nxv2i16, MVT::nxv2i32, 1 },
+    { ISD::TRUNCATE, MVT::nxv2i32, MVT::nxv2i64, 1 },
+    { ISD::TRUNCATE, MVT::nxv4i16, MVT::nxv4i32, 1 },
+    { ISD::TRUNCATE, MVT::nxv4i32, MVT::nxv4i64, 2 },
+    { ISD::TRUNCATE, MVT::nxv8i16, MVT::nxv8i32, 3 },
+    { ISD::TRUNCATE, MVT::nxv8i32, MVT::nxv8i64, 6 },
+
     // The number of shll instructions for the extension.
     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i16, 3 },
     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i16, 3 },
@@ -472,6 +489,23 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
     { ISD::FP_TO_UINT, MVT::v4i8,  MVT::v4f32, 2 },
 
+    // Lowering scalable
+    { ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f32, 1 },
+    { ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f32, 1 },
+    { ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f64, 1 },
+    { ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f32, 1 },
+    { ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f32, 1 },
+    { ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f64, 1 },
+
+
+    // Complex, from nxv2f32 legal type is nxv2i32 (no cost) or nxv2i64 (1 ext)
+    { ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f32, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f32, 1 },
+    { ISD::FP_TO_SINT, MVT::nxv2i8,  MVT::nxv2f32, 1 },
+    { ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f32, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f32, 1 },
+    { ISD::FP_TO_UINT, MVT::nxv2i8,  MVT::nxv2f32, 1 },
+
     // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
     { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
     { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
@@ -479,6 +513,75 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
     { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
     { ISD::FP_TO_UINT, MVT::v2i8,  MVT::v2f64, 2 },
+
+    // Complex, from nxv2f64: legal type is nxv2i32, 1 narrowing => ~2.
+    { ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f64, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f64, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv2i8,  MVT::nxv2f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv2i8,  MVT::nxv2f64, 2 },
+
+    // Complex, from nxv4f32 legal type is nxv4i16, 1 narrowing => ~2
+    { ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f32, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv4i8,  MVT::nxv4f32, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f32, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv4i8,  MVT::nxv4f32, 2 },
+
+    // Complex, from nxv8f64: legal type is nxv8i32, 1 narrowing => ~2.
+    { ISD::FP_TO_SINT, MVT::nxv8i32, MVT::nxv8f64, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f64, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv8i8,  MVT::nxv8f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv8i32, MVT::nxv8f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv8i8,  MVT::nxv8f64, 2 },
+
+    // Complex, from nxv4f64: legal type is nxv4i32, 1 narrowing => ~2.
+    { ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f64, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f64, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv4i8,  MVT::nxv4f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv4i8,  MVT::nxv4f64, 2 },
+
+    // Complex, from nxv8f32: legal type is nxv8i32 (no cost) or nxv8i64 (1 ext).
+    { ISD::FP_TO_SINT, MVT::nxv8i64, MVT::nxv8f32, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f32, 3 },
+    { ISD::FP_TO_SINT, MVT::nxv8i8,  MVT::nxv8f32, 1 },
+    { ISD::FP_TO_UINT, MVT::nxv8i64, MVT::nxv8f32, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f32, 1 },
+    { ISD::FP_TO_UINT, MVT::nxv8i8,  MVT::nxv8f32, 1 },
+
+    // Truncate from nxvmf32 to nxvmf16.
+    { ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f32, 1 },
+    { ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f32, 1 },
+    { ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f32, 3 },
+
+    // Truncate from nxvmf64 to nxvmf16.
+    { ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f64, 1 },
+    { ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f64, 3 },
+    { ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f64, 7 },
+
+    // Truncate from nxvmf64 to nxvmf32.
+    { ISD::FP_ROUND, MVT::nxv2f32, MVT::nxv2f64, 1 },
+    { ISD::FP_ROUND, MVT::nxv4f32, MVT::nxv4f64, 3 },
+    { ISD::FP_ROUND, MVT::nxv8f32, MVT::nxv8f64, 6 },
+
+    // Extend from nxvmf16 to nxvmf32.
+    { ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2f16, 1},
+    { ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
+    { ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
+
+    // Extend from nxvmf16 to nxvmf64.
+    { ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
+    { ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
+    { ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
+
+    // Extend from nxvmf32 to nxvmf64.
+    { ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
+    { ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
+    { ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},
+
   };
 
   if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll
new file mode 100644
index 000000000000..868092fcc8d6
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll
@@ -0,0 +1,33 @@
+; RUN: opt -cost-model -analyze -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @sve_fpext() {
+  ;CHECK-LABEL: 'sve_fpext'
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nxv2_f16_to_f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nxv4_f16_to_f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nxv8_f16_to_f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nxv2_f16_to_f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nxv4_f16_to_f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction:   %nxv8_f16_to_f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nxv2_f32_to_f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nxv4_f32_to_f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %nxv8_f32_to_f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+  %nxv2_f16_to_f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
+  %nxv4_f16_to_f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
+  %nxv8_f16_to_f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
+
+  %nxv2_f16_to_f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
+  %nxv4_f16_to_f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
+  %nxv8_f16_to_f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
+
+  %nxv2_f32_to_f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+  %nxv4_f32_to_f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+  %nxv8_f32_to_f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+
+  ret void
+}

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll
new file mode 100644
index 000000000000..56e3c0b639fa
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll
@@ -0,0 +1,91 @@
+; RUN: opt -cost-model -analyze -mtriple aarch64-linux-gnu -mattr=+sve -o - -S < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @sve-fptoi() {
+  ;CHECK-LABEL: 'sve-fptoi'
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv2f32_to_si8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv2f32_to_ui8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv2f32_to_si32 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv2f32_to_ui32 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv2f32_to_si64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv2f32_to_ui64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv2f64_to_si8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv2f64_to_ui8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv2f64_to_si32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv2f64_to_ui32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv2f64_to_si64 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv2f64_to_ui64 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv4f32_to_si8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv4f32_to_ui8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv4f32_to_si32 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv4f32_to_ui32 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction:   %nv4f32_to_si64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction:   %nv4f32_to_ui64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv4f64_to_si8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv4f64_to_ui8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv4f64_to_si32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv4f64_to_ui32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv4f64_to_si64 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv4f64_to_ui64 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv8f32_to_si8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv8f32_to_ui8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv8f32_to_si32 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv8f32_to_ui32 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv8f32_to_si64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv8f32_to_ui64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv8f64_to_si8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv8f64_to_ui8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv8f64_to_si32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv8f64_to_ui32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction:   %nv8f64_to_si64 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction:   %nv8f64_to_ui64 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i64>
+
+  %nv2f32_to_si8  = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
+  %nv2f32_to_ui8  = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
+  %nv2f32_to_si32 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
+  %nv2f32_to_ui32 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
+  %nv2f32_to_si64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
+  %nv2f32_to_ui64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
+
+  %nv2f64_to_si8  = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
+  %nv2f64_to_ui8  = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
+  %nv2f64_to_si32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
+  %nv2f64_to_ui32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
+  %nv2f64_to_si64 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i64>
+  %nv2f64_to_ui64 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i64>
+
+  %nv4f32_to_si8  = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
+  %nv4f32_to_ui8  = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
+  %nv4f32_to_si32 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
+  %nv4f32_to_ui32 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
+  %nv4f32_to_si64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
+  %nv4f32_to_ui64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
+
+  %nv4f64_to_si8  = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
+  %nv4f64_to_ui8  = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
+  %nv4f64_to_si32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
+  %nv4f64_to_ui32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
+  %nv4f64_to_si64 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i64>
+  %nv4f64_to_ui64 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i64>
+
+  %nv8f32_to_si8  = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
+  %nv8f32_to_ui8  = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
+  %nv8f32_to_si32 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
+  %nv8f32_to_ui32 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
+  %nv8f32_to_si64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
+  %nv8f32_to_ui64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
+
+  %nv8f64_to_si8  = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
+  %nv8f64_to_ui8  = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
+  %nv8f64_to_si32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
+  %nv8f64_to_ui32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
+  %nv8f64_to_si64 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i64>
+  %nv8f64_to_ui64 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i64>
+
+  ret void
+}

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll
new file mode 100644
index 000000000000..dc6dc0d434f3
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll
@@ -0,0 +1,33 @@
+; RUN: opt -cost-model -analyze -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s  2>%t| FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @sve_fptruncs() {
+  ;CHECK-LABEL: 'sve_fptruncs'
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction:   %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction:   %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction:   %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nxv2_f32_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction:   %nxv4_f32_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %nxv8_f32_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+  %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
+  %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
+  %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
+
+  %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
+  %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
+  %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
+
+  %nxv2_f32_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
+  %nxv4_f32_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
+  %nxv8_f32_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+
+  ret void
+}

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll b/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
index 32e760f2015d..d772b24c3a68 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
@@ -4,11 +4,43 @@
 ; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
-; CHECK: Found an estimated cost of 0 for instruction:   %0 = trunc <vscale x 2 x i64> %v to <vscale x 2 x i32>
+define void @sve_truncs() {
+  ;CHECK-LABEL: 'sve_truncs'
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %trunc_v4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction:   %trunc_v8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction:   %trunc_v8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %trunc_v4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction:   %trunc_v8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %trunc_v8i64_to_i32 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i32>
+  %trunc_v2i16_to_i1  = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
+  %trunc_v2i32_to_i1  = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
+  %trunc_v2i64_to_i1  = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
+
+  %trunc_v4i16_to_i1  = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
+  %trunc_v4i32_to_i1  = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
+  %trunc_v4i64_to_i1  = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
+
+  %trunc_v8i16_to_i1  = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
+  %trunc_v8i32_to_i1  = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
+  %trunc_v8i64_to_i1  = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
+
+  %trunc_v2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
+  %trunc_v2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
+
+  %trunc_v4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
+  %trunc_v4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
+
+  %trunc_v8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x  8 x i16>
+  %trunc_v8i64_to_i32 = trunc <vscale x 8 x i64> undef to <vscale x  8 x i32>
 
-define void @trunc_nxv2i64_to_nxv2i32(<vscale x 2 x i32>* %ptr, <vscale x 2 x i64> %v) {
-entry:
-  %0 = trunc <vscale x 2 x i64> %v to <vscale x 2 x i32>
-  store <vscale x 2 x i32> %0, <vscale x 2 x i32>* %ptr
   ret void
 }


        


More information about the llvm-commits mailing list