[llvm] eaf482f - [AArch64] Tweak truncate costs for some scalable vector types (#119542)

via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 19 02:07:44 PST 2024


Author: David Sherwood
Date: 2024-12-19T10:07:41Z
New Revision: eaf482f01252a0276a6b422dabe810a1abc7e168

URL: https://github.com/llvm/llvm-project/commit/eaf482f01252a0276a6b422dabe810a1abc7e168
DIFF: https://github.com/llvm/llvm-project/commit/eaf482f01252a0276a6b422dabe810a1abc7e168.diff

LOG: [AArch64] Tweak truncate costs for some scalable vector types (#119542)

== We were previously returning an invalid cost when truncating
anything to <vscale x 2 x i1>, which is incorrect since we can
generate perfectly good code for this.

== The costs for truncating legal or unpacked types to predicates
seemed overly optimistic. For example, when truncating
<vscale x 8 x i16> to <vscale x 8 x i1> we typically do
something like

  and z0.h, z0.h, #0x1
  cmpne   p0.h, p0/z, z0.h, #0

I guess it might depend upon whether the input value is
generated in the same block or not and if we can avoid the
inreg zero-extend. However, it feels safe to take the more
conservative cost here.

== The costs for some truncates such as

  trunc <vscale x 2 x i32> %a to <vscale x 2 x i16>

were 1, whereas in actual fact they are free and no instructions
are required.

== Also, for this

  trunc <vscale x 8 x i32> %a to <vscale x 8 x i16>

it's just a single uzp1 instruction so I reduced the cost to 1.

In general, I've added costs for all cases where the destination
type is legal or unpacked. One unfortunate side effect of this
is the costs for some fixed-width truncates when using SVE now
look too optimistic.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/AArch64/sve-cast.ll
    llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
    llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 6c2e04c3f8a7c1..817beb8c72a8f6 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2782,22 +2782,39 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
       {ISD::TRUNCATE, MVT::v16i32, MVT::v16i64, 4}, // 4 x uzp1
 
       // Truncations on nxvmiN
-      {ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i16, 1},
-      {ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i32, 1},
-      {ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i64, 1},
-      {ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i16, 1},
-      {ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i32, 1},
-      {ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i64, 2},
-      {ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i16, 1},
-      {ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i32, 3},
-      {ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i64, 5},
-      {ISD::TRUNCATE, MVT::nxv16i1, MVT::nxv16i8, 1},
-      {ISD::TRUNCATE, MVT::nxv2i16, MVT::nxv2i32, 1},
-      {ISD::TRUNCATE, MVT::nxv2i32, MVT::nxv2i64, 1},
-      {ISD::TRUNCATE, MVT::nxv4i16, MVT::nxv4i32, 1},
-      {ISD::TRUNCATE, MVT::nxv4i32, MVT::nxv4i64, 2},
-      {ISD::TRUNCATE, MVT::nxv8i16, MVT::nxv8i32, 3},
-      {ISD::TRUNCATE, MVT::nxv8i32, MVT::nxv8i64, 6},
+      {ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i8, 2},
+      {ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i16, 2},
+      {ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i32, 2},
+      {ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i64, 2},
+      {ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i8, 2},
+      {ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i16, 2},
+      {ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i32, 2},
+      {ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i64, 5},
+      {ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i8, 2},
+      {ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i16, 2},
+      {ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i32, 5},
+      {ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i64, 11},
+      {ISD::TRUNCATE, MVT::nxv16i1, MVT::nxv16i8, 2},
+      {ISD::TRUNCATE, MVT::nxv2i8, MVT::nxv2i16, 0},
+      {ISD::TRUNCATE, MVT::nxv2i8, MVT::nxv2i32, 0},
+      {ISD::TRUNCATE, MVT::nxv2i8, MVT::nxv2i64, 0},
+      {ISD::TRUNCATE, MVT::nxv2i16, MVT::nxv2i32, 0},
+      {ISD::TRUNCATE, MVT::nxv2i16, MVT::nxv2i64, 0},
+      {ISD::TRUNCATE, MVT::nxv2i32, MVT::nxv2i64, 0},
+      {ISD::TRUNCATE, MVT::nxv4i8, MVT::nxv4i16, 0},
+      {ISD::TRUNCATE, MVT::nxv4i8, MVT::nxv4i32, 0},
+      {ISD::TRUNCATE, MVT::nxv4i8, MVT::nxv4i64, 1},
+      {ISD::TRUNCATE, MVT::nxv4i16, MVT::nxv4i32, 0},
+      {ISD::TRUNCATE, MVT::nxv4i16, MVT::nxv4i64, 1},
+      {ISD::TRUNCATE, MVT::nxv4i32, MVT::nxv4i64, 1},
+      {ISD::TRUNCATE, MVT::nxv8i8, MVT::nxv8i16, 0},
+      {ISD::TRUNCATE, MVT::nxv8i8, MVT::nxv8i32, 1},
+      {ISD::TRUNCATE, MVT::nxv8i8, MVT::nxv8i64, 3},
+      {ISD::TRUNCATE, MVT::nxv8i16, MVT::nxv8i32, 1},
+      {ISD::TRUNCATE, MVT::nxv8i16, MVT::nxv8i64, 3},
+      {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i16, 1},
+      {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i32, 3},
+      {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i64, 7},
 
       // The number of shll instructions for the extension.
       {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3},

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll b/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll
index 4b7b1ff7a8b479..0b051169a1b363 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll
@@ -418,27 +418,27 @@ define void @trunc() {
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s2i8i16 = trunc <2 x i16> undef to <2 x i8>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s2i8i32 = trunc <2 x i32> undef to <2 x i8>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s2i8i64 = trunc <2 x i64> undef to <2 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s2i16i32 = trunc <2 x i32> undef to <2 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s2i16i32 = trunc <2 x i32> undef to <2 x i16>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s2i16i64 = trunc <2 x i64> undef to <2 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s2i32i64 = trunc <2 x i64> undef to <2 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s2i32i64 = trunc <2 x i64> undef to <2 x i32>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i8i16 = trunc <4 x i16> undef to <4 x i8>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i8i32 = trunc <4 x i32> undef to <4 x i8>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i8i64 = trunc <4 x i64> undef to <4 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s4i16i32 = trunc <4 x i32> undef to <4 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i16i32 = trunc <4 x i32> undef to <4 x i16>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i16i64 = trunc <4 x i64> undef to <4 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i8i16 = trunc <8 x i16> undef to <8 x i8>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i8i32 = trunc <8 x i32> undef to <8 x i8>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i8i64 = trunc <8 x i64> undef to <8 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i16i64 = trunc <8 x i64> undef to <8 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i8i16 = trunc <16 x i16> undef to <16 x i8>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i8i32 = trunc <16 x i32> undef to <16 x i8>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i8i64 = trunc <16 x i64> undef to <16 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i16i64 = trunc <16 x i64> undef to <16 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; FIXED-MIN-256-LABEL: 'trunc'
@@ -463,19 +463,19 @@ define void @trunc() {
 ; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i8i64 = trunc <4 x i64> undef to <4 x i8>
 ; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s4i16i32 = trunc <4 x i32> undef to <4 x i16>
 ; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i16i64 = trunc <4 x i64> undef to <4 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
 ; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s8i8i16 = trunc <8 x i16> undef to <8 x i8>
 ; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i8i32 = trunc <8 x i32> undef to <8 x i8>
 ; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i8i64 = trunc <8 x i64> undef to <8 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
 ; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i16i64 = trunc <8 x i64> undef to <8 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
 ; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i8i16 = trunc <16 x i16> undef to <16 x i8>
 ; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i8i32 = trunc <16 x i32> undef to <16 x i8>
 ; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i8i64 = trunc <16 x i64> undef to <16 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
 ; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i16i64 = trunc <16 x i64> undef to <16 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
 ; FIXED-MIN-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; FIXED-MIN-2048-LABEL: 'trunc'
@@ -500,19 +500,19 @@ define void @trunc() {
 ; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i8i64 = trunc <4 x i64> undef to <4 x i8>
 ; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s4i16i32 = trunc <4 x i32> undef to <4 x i16>
 ; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i16i64 = trunc <4 x i64> undef to <4 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
 ; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s8i8i16 = trunc <8 x i16> undef to <8 x i8>
 ; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i8i32 = trunc <8 x i32> undef to <8 x i8>
 ; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i8i64 = trunc <8 x i64> undef to <8 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
 ; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i16i64 = trunc <8 x i64> undef to <8 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
 ; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i8i16 = trunc <16 x i16> undef to <16 x i8>
 ; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i8i32 = trunc <16 x i32> undef to <16 x i8>
 ; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i8i64 = trunc <16 x i64> undef to <16 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
 ; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i16i64 = trunc <16 x i64> undef to <16 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
 ; FIXED-MIN-2048-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %r8 = trunc i8 undef to i1

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index dd3909ade53159..3e5de313c3cacc 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -638,10 +638,10 @@ define void @vector_splice() #0 {
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv16i8_neg = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 -1)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv32i8_neg = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 -1)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Invalid cost for instruction: %splice_nxv1i16_neg = call <vscale x 1 x i16> @llvm.vector.splice.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 -1)
@@ -671,10 +671,10 @@ define void @vector_splice() #0 {
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4bf16_neg = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 -1)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv8bf16_neg = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 -1)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv16bf16_neg = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 -1)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv16i1_neg = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 -1)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv8i1_neg = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 -1)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv4i1_neg = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 -1)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv2i1_neg = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 -1)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv16i1_neg = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 -1)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv8i1_neg = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 -1)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv4i1_neg = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 -1)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv2i1_neg = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 -1)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Invalid cost for instruction: %splice_nxv1i1_neg = call <vscale x 1 x i1> @llvm.vector.splice.nxv1i1(<vscale x 1 x i1> zeroinitializer, <vscale x 1 x i1> zeroinitializer, i32 -1)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -702,10 +702,10 @@ define void @vector_splice() #0 {
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv16i8_neg = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 -1)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv32i8_neg = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 -1)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Invalid cost for instruction: %splice_nxv1i16_neg = call <vscale x 1 x i16> @llvm.vector.splice.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 -1)
@@ -735,10 +735,10 @@ define void @vector_splice() #0 {
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4bf16_neg = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 -1)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv8bf16_neg = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 -1)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv16bf16_neg = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 -1)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv16i1_neg = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 -1)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv8i1_neg = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 -1)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv4i1_neg = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 -1)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv2i1_neg = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 -1)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv16i1_neg = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 -1)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv8i1_neg = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 -1)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv4i1_neg = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 -1)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv2i1_neg = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 -1)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Invalid cost for instruction: %splice_nxv1i1_neg = call <vscale x 1 x i1> @llvm.vector.splice.nxv1i1(<vscale x 1 x i1> zeroinitializer, <vscale x 1 x i1> zeroinitializer, i32 -1)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll b/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
index 767bd5f5b75cb8..e754d264c1b416 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
@@ -5,43 +5,82 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @sve_truncs() {
 ; CHECK-LABEL: 'sve_truncs'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %trunc_v4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %trunc_v4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %trunc_v4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %trunc_v8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %trunc_v8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %trunc_v8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %trunc_v4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %trunc_v4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %trunc_v8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %trunc_v8i64_to_i32 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv2i8_to_i1 = trunc <vscale x 2 x i8> undef to <vscale x 2 x i1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv4i8_to_i1 = trunc <vscale x 4 x i8> undef to <vscale x 4 x i1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %trunc_nxv4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv8i8_to_i1 = trunc <vscale x 8 x i8> undef to <vscale x 8 x i1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %trunc_nxv8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %trunc_nxv8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv2i16_to_i8 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv2i32_to_i8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv2i64_to_i8 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv2i64_to_i16 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv4i16_to_i8 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv4i32_to_i8 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv4i64_to_i8 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv4i64_to_i16 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv8i16_to_i8 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv8i32_to_i8 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %trunc_nxv8i64_to_i8 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %trunc_nxv8i64_to_i16 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv16i16_to_i8 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %trunc_nxv16i32_to_i8 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %trunc_nxv16i64_to_i8 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i8>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %trunc_v2i16_to_i1  = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
-  %trunc_v2i32_to_i1  = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
-  %trunc_v2i64_to_i1  = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
+  %trunc_nxv2i8_to_i1   = trunc <vscale x 2 x i8>  undef to <vscale x 2 x i1>
+  %trunc_nxv2i16_to_i1  = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
+  %trunc_nxv2i32_to_i1  = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
+  %trunc_nxv2i64_to_i1  = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
 
-  %trunc_v4i16_to_i1  = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
-  %trunc_v4i32_to_i1  = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
-  %trunc_v4i64_to_i1  = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
+  %trunc_nxv4i8_to_i1   = trunc <vscale x 4 x i8>  undef to <vscale x 4 x i1>
+  %trunc_nxv4i16_to_i1  = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
+  %trunc_nxv4i32_to_i1  = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
+  %trunc_nxv4i64_to_i1  = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
 
-  %trunc_v8i16_to_i1  = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
-  %trunc_v8i32_to_i1  = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
-  %trunc_v8i64_to_i1  = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
+  %trunc_nxv8i8_to_i1   = trunc <vscale x 8 x i8>  undef to <vscale x 8 x i1>
+  %trunc_nxv8i16_to_i1  = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
+  %trunc_nxv8i32_to_i1  = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
+  %trunc_nxv8i64_to_i1  = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
 
-  %trunc_v2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
-  %trunc_v2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
+; Truncates to unpacked or legal types with vscale x 2 elements
+  %trunc_nxv2i16_to_i8 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i8>
+  %trunc_nxv2i32_to_i8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
+  %trunc_nxv2i64_to_i8 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i8>
+  %trunc_nxv2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
+  %trunc_nxv2i64_to_i16 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i16>
+  %trunc_nxv2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
 
-  %trunc_v4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
-  %trunc_v4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
+; Truncates to unpacked or legal with vscale x 4 elements
+  %trunc_nxv4i16_to_i8  = trunc <vscale x 4 x i16> undef to <vscale x 4 x i8>
+  %trunc_nxv4i32_to_i8  = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
+  %trunc_nxv4i64_to_i8  = trunc <vscale x 4 x i64> undef to <vscale x 4 x i8>
+  %trunc_nxv4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
+  %trunc_nxv4i64_to_i16 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i16>
+  %trunc_nxv4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
 
-  %trunc_v8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x  8 x i16>
-  %trunc_v8i64_to_i32 = trunc <vscale x 8 x i64> undef to <vscale x  8 x i32>
+; Truncates to unpacked or legal with vscale x 8 elements
+  %trunc_nxv8i16_to_i8  = trunc <vscale x 8 x i16> undef to <vscale x 8 x i8>
+  %trunc_nxv8i32_to_i8  = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
+  %trunc_nxv8i64_to_i8  = trunc <vscale x 8 x i64> undef to <vscale x 8 x i8>
+  %trunc_nxv8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
+  %trunc_nxv8i64_to_i16 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i16>
+
+; Truncates to unpacked or legal with vscale x 16 elements
+  %trunc_nxv16i16_to_i8 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i8>
+  %trunc_nxv16i32_to_i8 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i8>
+  %trunc_nxv16i64_to_i8 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i8>
 
   ret void
 }


        


More information about the llvm-commits mailing list