[llvm] [AArch64] Tweak truncate costs for some scalable vector types (PR #119542)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 19 01:16:42 PST 2024
https://github.com/david-arm updated https://github.com/llvm/llvm-project/pull/119542
>From 8ed0278784fdb95af9ab21abc4d6d8d92f1daefb Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Wed, 11 Dec 2024 11:06:19 +0000
Subject: [PATCH 1/3] Add tests
---
.../Analysis/CostModel/AArch64/sve-trunc.ll | 99 +++++++++++++------
1 file changed, 69 insertions(+), 30 deletions(-)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll b/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
index 767bd5f5b75cb8..b788c63cc99774 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
@@ -5,43 +5,82 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
define void @sve_truncs() {
; CHECK-LABEL: 'sve_truncs'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_v4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_v8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %trunc_v8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_v4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_v8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %trunc_v8i64_to_i32 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i32>
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %trunc_nxv2i8_to_i1 = trunc <vscale x 2 x i8> undef to <vscale x 2 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %trunc_nxv4i8_to_i1 = trunc <vscale x 4 x i8> undef to <vscale x 4 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %trunc_nxv8i8_to_i1 = trunc <vscale x 8 x i8> undef to <vscale x 8 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_nxv8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %trunc_nxv8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv2i16_to_i8 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv2i32_to_i8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv2i64_to_i8 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv2i64_to_i16 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv4i16_to_i8 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv4i32_to_i8 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv4i64_to_i8 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv4i64_to_i16 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv8i16_to_i8 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv8i32_to_i8 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_nxv8i64_to_i8 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_nxv8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_nxv8i64_to_i16 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv16i16_to_i8 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_nxv16i32_to_i8 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %trunc_nxv16i64_to_i8 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i8>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %trunc_v2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
- %trunc_v2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
- %trunc_v2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
+ %trunc_nxv2i8_to_i1 = trunc <vscale x 2 x i8> undef to <vscale x 2 x i1>
+ %trunc_nxv2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
+ %trunc_nxv2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
+ %trunc_nxv2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
- %trunc_v4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
- %trunc_v4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
- %trunc_v4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
+ %trunc_nxv4i8_to_i1 = trunc <vscale x 4 x i8> undef to <vscale x 4 x i1>
+ %trunc_nxv4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
+ %trunc_nxv4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
+ %trunc_nxv4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
- %trunc_v8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
- %trunc_v8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
- %trunc_v8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
+ %trunc_nxv8i8_to_i1 = trunc <vscale x 8 x i8> undef to <vscale x 8 x i1>
+ %trunc_nxv8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
+ %trunc_nxv8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
+ %trunc_nxv8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
- %trunc_v2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
- %trunc_v2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
+; Truncates to unpacked or legal types with vscale x 2 elements
+ %trunc_nxv2i16_to_i8 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i8>
+ %trunc_nxv2i32_to_i8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
+ %trunc_nxv2i64_to_i8 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i8>
+ %trunc_nxv2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
+ %trunc_nxv2i64_to_i16 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i16>
+ %trunc_nxv2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
- %trunc_v4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
- %trunc_v4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
+; Truncates to unpacked or legal with vscale x 4 elements
+ %trunc_nxv4i16_to_i8 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i8>
+ %trunc_nxv4i32_to_i8 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
+ %trunc_nxv4i64_to_i8 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i8>
+ %trunc_nxv4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
+ %trunc_nxv4i64_to_i16 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i16>
+ %trunc_nxv4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
- %trunc_v8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
- %trunc_v8i64_to_i32 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i32>
+; Truncates to unpacked or legal with vscale x 8 elements
+ %trunc_nxv8i16_to_i8 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i8>
+ %trunc_nxv8i32_to_i8 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
+ %trunc_nxv8i64_to_i8 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i8>
+ %trunc_nxv8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
+ %trunc_nxv8i64_to_i16 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i16>
+
+; Truncates to unpacked or legal with vscale x 16 elements
+ %trunc_nxv16i16_to_i8 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i8>
+ %trunc_nxv16i32_to_i8 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i8>
+ %trunc_nxv16i64_to_i8 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i8>
ret void
}
>From 5be65518323a0202b6db01652a49063de4f18b25 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Wed, 18 Dec 2024 10:17:04 +0000
Subject: [PATCH 2/3] [AArch64] Tweak truncate costs for some scalable vector
types
* We were previously returning an invalid cost when truncating
anything to <vscale x 2 x i1>, which is incorrect since we can
generate perfectly good code for this.
* The costs for truncating legal or unpacked types to predicates
seemed overly optimistic. For example, when truncating
<vscale x 8 x i16> to <vscale x 8 x i1> we typically do
something like
and z0.h, z0.h, #0x1
cmpne p0.h, p0/z, z0.h, #0
I guess it might depend upon whether the input value is
generated in the same block or not and if we can avoid the
inreg zero-extend. However, it feels safe to take the more
conservative cost here.
* The costs for some truncates such as
trunc <vscale x 2 x i32> %a to <vscale x 2 x i16>
were 1, whereas in actual fact they are free and no instructions
are required. Also, for this
trunc <vscale x 8 x i32> %a to <vscale x 8 x i16>
it's just a single uzp1 instruction so I reduced the cost to 1.
---
.../AArch64/AArch64TargetTransformInfo.cpp | 49 +++++++++++++------
.../Analysis/CostModel/AArch64/sve-cast.ll | 36 +++++++-------
.../CostModel/AArch64/sve-intrinsics.ll | 32 ++++++------
.../Analysis/CostModel/AArch64/sve-trunc.ll | 34 ++++++-------
4 files changed, 84 insertions(+), 67 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 6c2e04c3f8a7c1..01f5063507f168 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2782,22 +2782,39 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::TRUNCATE, MVT::v16i32, MVT::v16i64, 4}, // 4 x uzp1
// Truncations on nxvmiN
- {ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i16, 1},
- {ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i32, 1},
- {ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i64, 1},
- {ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i16, 1},
- {ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i32, 1},
- {ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i64, 2},
- {ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i16, 1},
- {ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i32, 3},
- {ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i64, 5},
- {ISD::TRUNCATE, MVT::nxv16i1, MVT::nxv16i8, 1},
- {ISD::TRUNCATE, MVT::nxv2i16, MVT::nxv2i32, 1},
- {ISD::TRUNCATE, MVT::nxv2i32, MVT::nxv2i64, 1},
- {ISD::TRUNCATE, MVT::nxv4i16, MVT::nxv4i32, 1},
- {ISD::TRUNCATE, MVT::nxv4i32, MVT::nxv4i64, 2},
- {ISD::TRUNCATE, MVT::nxv8i16, MVT::nxv8i32, 3},
- {ISD::TRUNCATE, MVT::nxv8i32, MVT::nxv8i64, 6},
+ {ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i8, 2},
+ {ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i16, 2},
+ {ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i32, 2},
+ {ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i64, 2},
+ {ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i8, 2},
+ {ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i16, 2},
+ {ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i32, 2},
+ {ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i64, 5},
+ {ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i8, 2},
+ {ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i16, 2},
+ {ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i32, 5},
+ {ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i64, 11},
+ {ISD::TRUNCATE, MVT::nxv16i1, MVT::nxv16i8, 2},
+ {ISD::TRUNCATE, MVT::nxv2i8, MVT::nxv2i16, 0},
+ {ISD::TRUNCATE, MVT::nxv2i8, MVT::nxv2i32, 0},
+ {ISD::TRUNCATE, MVT::nxv2i8, MVT::nxv2i64, 0},
+ {ISD::TRUNCATE, MVT::nxv2i16, MVT::nxv2i32, 0},
+ {ISD::TRUNCATE, MVT::nxv2i16, MVT::nxv2i64, 0},
+ {ISD::TRUNCATE, MVT::nxv2i32, MVT::nxv2i64, 0},
+ {ISD::TRUNCATE, MVT::nxv4i8, MVT::nxv4i16, 0},
+ {ISD::TRUNCATE, MVT::nxv4i8, MVT::nxv4i32, 0},
+ {ISD::TRUNCATE, MVT::nxv4i8, MVT::nxv4i64, 1},
+ {ISD::TRUNCATE, MVT::nxv4i16, MVT::nxv4i32, 0},
+ {ISD::TRUNCATE, MVT::nxv4i16, MVT::nxv4i64, 1},
+ {ISD::TRUNCATE, MVT::nxv4i32, MVT::nxv4i64, 1},
+ {ISD::TRUNCATE, MVT::nxv8i8, MVT::nxv8i16, 0},
+ {ISD::TRUNCATE, MVT::nxv8i8, MVT::nxv8i32, 1},
+ {ISD::TRUNCATE, MVT::nxv8i8, MVT::nxv8i64, 3},
+ {ISD::TRUNCATE, MVT::nxv8i16, MVT::nxv8i32, 1},
+ {ISD::TRUNCATE, MVT::nxv8i16, MVT::nxv8i64, 3},
+ {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i16, 1},
+ {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i32, 3},
+ {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i64, 7},
// The number of shll instructions for the extension.
{ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3},
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll b/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll
index 4b7b1ff7a8b479..0b051169a1b363 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll
@@ -418,27 +418,27 @@ define void @trunc() {
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s2i8i16 = trunc <2 x i16> undef to <2 x i8>
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s2i8i32 = trunc <2 x i32> undef to <2 x i8>
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s2i8i64 = trunc <2 x i64> undef to <2 x i8>
-; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s2i16i32 = trunc <2 x i32> undef to <2 x i16>
+; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s2i16i32 = trunc <2 x i32> undef to <2 x i16>
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s2i16i64 = trunc <2 x i64> undef to <2 x i16>
-; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s2i32i64 = trunc <2 x i64> undef to <2 x i32>
+; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s2i32i64 = trunc <2 x i64> undef to <2 x i32>
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s4i8i16 = trunc <4 x i16> undef to <4 x i8>
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s4i8i32 = trunc <4 x i32> undef to <4 x i8>
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s4i8i64 = trunc <4 x i64> undef to <4 x i8>
-; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s4i16i32 = trunc <4 x i32> undef to <4 x i16>
+; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s4i16i32 = trunc <4 x i32> undef to <4 x i16>
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s4i16i64 = trunc <4 x i64> undef to <4 x i16>
-; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
+; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8i8i16 = trunc <8 x i16> undef to <8 x i8>
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8i8i32 = trunc <8 x i32> undef to <8 x i8>
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8i8i64 = trunc <8 x i64> undef to <8 x i8>
-; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
+; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8i16i64 = trunc <8 x i64> undef to <8 x i16>
-; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
+; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i8i16 = trunc <16 x i16> undef to <16 x i8>
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i8i32 = trunc <16 x i32> undef to <16 x i8>
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i8i64 = trunc <16 x i64> undef to <16 x i8>
-; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
+; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i16i64 = trunc <16 x i64> undef to <16 x i16>
-; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
+; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
; SVE128-NO-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; FIXED-MIN-256-LABEL: 'trunc'
@@ -463,19 +463,19 @@ define void @trunc() {
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s4i8i64 = trunc <4 x i64> undef to <4 x i8>
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s4i16i32 = trunc <4 x i32> undef to <4 x i16>
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s4i16i64 = trunc <4 x i64> undef to <4 x i16>
-; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
+; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8i8i16 = trunc <8 x i16> undef to <8 x i8>
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8i8i32 = trunc <8 x i32> undef to <8 x i8>
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8i8i64 = trunc <8 x i64> undef to <8 x i8>
-; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
+; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8i16i64 = trunc <8 x i64> undef to <8 x i16>
-; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
+; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i8i16 = trunc <16 x i16> undef to <16 x i8>
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i8i32 = trunc <16 x i32> undef to <16 x i8>
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i8i64 = trunc <16 x i64> undef to <16 x i8>
-; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
+; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i16i64 = trunc <16 x i64> undef to <16 x i16>
-; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
+; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; FIXED-MIN-2048-LABEL: 'trunc'
@@ -500,19 +500,19 @@ define void @trunc() {
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s4i8i64 = trunc <4 x i64> undef to <4 x i8>
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s4i16i32 = trunc <4 x i32> undef to <4 x i16>
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s4i16i64 = trunc <4 x i64> undef to <4 x i16>
-; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
+; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8i8i16 = trunc <8 x i16> undef to <8 x i8>
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8i8i32 = trunc <8 x i32> undef to <8 x i8>
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8i8i64 = trunc <8 x i64> undef to <8 x i8>
-; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
+; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8i16i64 = trunc <8 x i64> undef to <8 x i16>
-; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
+; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i8i16 = trunc <16 x i16> undef to <16 x i8>
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i8i32 = trunc <16 x i32> undef to <16 x i8>
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i8i64 = trunc <16 x i64> undef to <16 x i8>
-; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
+; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i16i64 = trunc <16 x i64> undef to <16 x i16>
-; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
+; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%r8 = trunc i8 undef to i1
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index dd3909ade53159..3e5de313c3cacc 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -638,10 +638,10 @@ define void @vector_splice() #0 {
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1)
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1)
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1)
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv16i8_neg = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 -1)
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv32i8_neg = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 -1)
; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv1i16_neg = call <vscale x 1 x i16> @llvm.vector.splice.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 -1)
@@ -671,10 +671,10 @@ define void @vector_splice() #0 {
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4bf16_neg = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 -1)
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv8bf16_neg = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 -1)
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv16bf16_neg = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 -1)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv16i1_neg = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 -1)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv8i1_neg = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 -1)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv4i1_neg = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 -1)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv2i1_neg = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 -1)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv16i1_neg = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 -1)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv8i1_neg = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 -1)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv4i1_neg = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 -1)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv2i1_neg = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 -1)
; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv1i1_neg = call <vscale x 1 x i1> @llvm.vector.splice.nxv1i1(<vscale x 1 x i1> zeroinitializer, <vscale x 1 x i1> zeroinitializer, i32 -1)
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
@@ -702,10 +702,10 @@ define void @vector_splice() #0 {
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1)
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1)
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1)
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv16i8_neg = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 -1)
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv32i8_neg = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 -1)
; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv1i16_neg = call <vscale x 1 x i16> @llvm.vector.splice.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 -1)
@@ -735,10 +735,10 @@ define void @vector_splice() #0 {
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4bf16_neg = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 -1)
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv8bf16_neg = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 -1)
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv16bf16_neg = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 -1)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv16i1_neg = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 -1)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv8i1_neg = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 -1)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv4i1_neg = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 -1)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv2i1_neg = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 -1)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv16i1_neg = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 -1)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv8i1_neg = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 -1)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv4i1_neg = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 -1)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv2i1_neg = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 -1)
; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv1i1_neg = call <vscale x 1 x i1> @llvm.vector.splice.nxv1i1(<vscale x 1 x i1> zeroinitializer, <vscale x 1 x i1> zeroinitializer, i32 -1)
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll b/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
index b788c63cc99774..e754d264c1b416 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
@@ -5,34 +5,34 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
define void @sve_truncs() {
; CHECK-LABEL: 'sve_truncs'
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %trunc_nxv2i8_to_i1 = trunc <vscale x 2 x i8> undef to <vscale x 2 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %trunc_nxv4i8_to_i1 = trunc <vscale x 4 x i8> undef to <vscale x 4 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %trunc_nxv8i8_to_i1 = trunc <vscale x 8 x i8> undef to <vscale x 8 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_nxv8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %trunc_nxv8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv2i8_to_i1 = trunc <vscale x 2 x i8> undef to <vscale x 2 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv4i8_to_i1 = trunc <vscale x 4 x i8> undef to <vscale x 4 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %trunc_nxv4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv8i8_to_i1 = trunc <vscale x 8 x i8> undef to <vscale x 8 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %trunc_nxv8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %trunc_nxv8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv2i16_to_i8 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i8>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv2i32_to_i8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv2i64_to_i8 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv2i64_to_i16 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv4i16_to_i8 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i8>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv4i32_to_i8 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv4i64_to_i8 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv4i64_to_i16 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_nxv4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc_nxv8i16_to_i8 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i8>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv8i32_to_i8 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_nxv8i64_to_i8 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_nxv8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_nxv8i64_to_i16 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_nxv16i16_to_i8 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i8>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_nxv16i32_to_i8 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i8>
>From 58212a10d03cce62300ef6866b47d2e9a21aa771 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Wed, 18 Dec 2024 16:12:17 +0000
Subject: [PATCH 3/3] Fix formatting
---
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 01f5063507f168..817beb8c72a8f6 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2782,15 +2782,15 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::TRUNCATE, MVT::v16i32, MVT::v16i64, 4}, // 4 x uzp1
// Truncations on nxvmiN
- {ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i8, 2},
+ {ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i8, 2},
{ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i16, 2},
{ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i32, 2},
{ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i64, 2},
- {ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i8, 2},
+ {ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i8, 2},
{ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i16, 2},
{ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i32, 2},
{ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i64, 5},
- {ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i8, 2},
+ {ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i8, 2},
{ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i16, 2},
{ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i32, 5},
{ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i64, 11},
More information about the llvm-commits
mailing list