[llvm] 9199b65 - [CostModel][AArch64] Add missing costs for getShuffleCost with scalable vectors

Thu May 20 01:16:31 PDT 2021

Author: Caroline Concatto
Date: 2021-05-20T09:08:31+01:00
New Revision: 9199b6535df17c719c980beb3883c956f9d5f809

URL: https://github.com/llvm/llvm-project/commit/9199b6535df17c719c980beb3883c956f9d5f809
DIFF: https://github.com/llvm/llvm-project/commit/9199b6535df17c719c980beb3883c956f9d5f809.diff

LOG: [CostModel][AArch64] Add missing costs for getShuffleCost with scalable vectors

Differential Revision: https://reviews.llvm.org/D102490

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
    llvm/test/Analysis/CostModel/AArch64/sve-shuffle-broadcast.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 1293d455aba38..90762052dc3a7 100644

--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1678,17 +1678,31 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
       { TTI::SK_Broadcast, MVT::nxv8i16,  1 },
       { TTI::SK_Broadcast, MVT::nxv4i32,  1 },
       { TTI::SK_Broadcast, MVT::nxv2i64,  1 },
+      { TTI::SK_Broadcast, MVT::nxv2f16,  1 },
+      { TTI::SK_Broadcast, MVT::nxv4f16,  1 },
       { TTI::SK_Broadcast, MVT::nxv8f16,  1 },
+      { TTI::SK_Broadcast, MVT::nxv2bf16, 1 },
+      { TTI::SK_Broadcast, MVT::nxv4bf16, 1 },
       { TTI::SK_Broadcast, MVT::nxv8bf16, 1 },
+      { TTI::SK_Broadcast, MVT::nxv2f32,  1 },
       { TTI::SK_Broadcast, MVT::nxv4f32,  1 },
       { TTI::SK_Broadcast, MVT::nxv2f64,  1 },
+      { TTI::SK_Broadcast, MVT::nxv16i1,  1 },
+      { TTI::SK_Broadcast, MVT::nxv8i1,   1 },
+      { TTI::SK_Broadcast, MVT::nxv4i1,   1 },
+      { TTI::SK_Broadcast, MVT::nxv2i1,   1 },
       // Handle the cases for vector.reverse with scalable vectors
       { TTI::SK_Reverse, MVT::nxv16i8,  1 },
       { TTI::SK_Reverse, MVT::nxv8i16,  1 },
       { TTI::SK_Reverse, MVT::nxv4i32,  1 },
       { TTI::SK_Reverse, MVT::nxv2i64,  1 },
+      { TTI::SK_Reverse, MVT::nxv2f16,  1 },
+      { TTI::SK_Reverse, MVT::nxv4f16,  1 },
       { TTI::SK_Reverse, MVT::nxv8f16,  1 },
+      { TTI::SK_Reverse, MVT::nxv2bf16, 1 },
+      { TTI::SK_Reverse, MVT::nxv4bf16, 1 },
       { TTI::SK_Reverse, MVT::nxv8bf16, 1 },
+      { TTI::SK_Reverse, MVT::nxv2f32,  1 },
       { TTI::SK_Reverse, MVT::nxv4f32,  1 },
       { TTI::SK_Reverse, MVT::nxv2f64,  1 },
       { TTI::SK_Reverse, MVT::nxv16i1,  1 },

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index f0c11e2442036..1877b190ae714 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -114,18 +114,25 @@ define void @vector_reverse() #0 {
 ; CHECK-LABEL: 'vector_reverse':
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.experimental.vector.reverse.nxv2i16(<vscale x 2 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.experimental.vector.reverse.nxv4i16(<vscale x 4 x i16> undef)
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8if16 = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.experimental.vector.reverse.nxv2f16(<vscale x 2 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.experimental.vector.reverse.nxv4f16(<vscale x 4 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> undef)
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.experimental.vector.reverse.nxv2f32(<vscale x 2 x float> undef)
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> undef)
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float> undef)
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> undef)
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.experimental.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.experimental.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> undef)
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.experimental.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef)
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.experimental.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef)
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> undef)
@@ -135,18 +142,25 @@ define void @vector_reverse() #0 {
 
   %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
   %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
+  %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.experimental.vector.reverse.nxv2i16(<vscale x 2 x i16> undef)
+  %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.experimental.vector.reverse.nxv4i16(<vscale x 4 x i16> undef)
   %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
   %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
   %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
   %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
   %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
   %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
-  %reverse_nxv8if16 = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> undef)
+  %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.experimental.vector.reverse.nxv2f16(<vscale x 2 x half> undef)
+  %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.experimental.vector.reverse.nxv4f16(<vscale x 4 x half> undef)
+  %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> undef)
   %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half> undef)
+  %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.experimental.vector.reverse.nxv2f32(<vscale x 2 x float> undef)
   %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> undef)
   %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float> undef)
   %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> undef)
   %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double> undef)
+  %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.experimental.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> undef)
+  %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.experimental.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> undef)
   %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.experimental.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef)
   %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.experimental.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef)
   %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> undef)
@@ -157,18 +171,25 @@ define void @vector_reverse() #0 {
 }
 declare <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8>)
 declare <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8>)
+declare <vscale x 2 x i16> @llvm.experimental.vector.reverse.nxv2i16(<vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.experimental.vector.reverse.nxv4i16(<vscale x 4 x i16>)
 declare <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16>)
 declare <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16>)
 declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32>)
 declare <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32>)
 declare <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64>)
 declare <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64>)
+declare <vscale x 2 x half> @llvm.experimental.vector.reverse.nxv2f16(<vscale x 2 x half>)
+declare <vscale x 4 x half> @llvm.experimental.vector.reverse.nxv4f16(<vscale x 4 x half>)
 declare <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half>)
 declare <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half>)
+declare <vscale x 2 x float> @llvm.experimental.vector.reverse.nxv2f32(<vscale x 2 x float>)
 declare <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float>)
 declare <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float>)
 declare <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double>)
 declare <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double>)
+declare <vscale x 2 x bfloat> @llvm.experimental.vector.reverse.nxv2bf16(<vscale x 2 x bfloat>)
+declare <vscale x 4 x bfloat> @llvm.experimental.vector.reverse.nxv4bf16(<vscale x 4 x bfloat>)
 declare <vscale x 8 x bfloat> @llvm.experimental.vector.reverse.nxv8bf16(<vscale x 8 x bfloat>)
 declare <vscale x 16 x bfloat> @llvm.experimental.vector.reverse.nxv16bf16(<vscale x 16 x bfloat>)
 declare <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1>)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/AArch64/sve-shuffle-broadcast.ll
index e94cbb23c53ae..25f7177d908c6 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-shuffle-broadcast.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-shuffle-broadcast.ll
@@ -12,14 +12,23 @@ define void  @broadcast() #0{
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %5 = shufflevector <vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %6 = shufflevector <vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %7 = shufflevector <vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %8 = shufflevector <vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %9 = shufflevector <vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %10 = shufflevector <vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %11 = shufflevector <vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %12 = shufflevector <vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %13 = shufflevector <vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %14 = shufflevector <vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %15 = shufflevector <vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %8 = shufflevector <vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %9 = shufflevector <vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %10 = shufflevector <vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %11 = shufflevector <vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %12 = shufflevector <vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %13 = shufflevector <vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %14 = shufflevector <vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %15 = shufflevector <vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %16 = shufflevector <vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %17 = shufflevector <vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %18 = shufflevector <vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %19 = shufflevector <vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %20 = shufflevector <vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %21 = shufflevector <vscale x 16 x i1> undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %22 = shufflevector <vscale x 8 x i1> undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %23 = shufflevector <vscale x 4 x i1> undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %24 = shufflevector <vscale x 2 x i1> undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NETX: Cost Model: Found an estimated cost of 0 for instruction:   ret void
 
   %zero = shufflevector <vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
@@ -30,14 +39,23 @@ define void  @broadcast() #0{
   %5 = shufflevector <vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
   %6 = shufflevector <vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %7 = shufflevector <vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
-  %8 = shufflevector <vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
-  %9 = shufflevector <vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i32> zeroinitializer
- %10 = shufflevector <vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
-  %11 = shufflevector <vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
- %12 = shufflevector <vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
- %13 = shufflevector <vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i32> zeroinitializer
- %14 = shufflevector <vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i32> zeroinitializer
- %15 = shufflevector <vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i32> zeroinitializer
+  %8 = shufflevector <vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
+  %9 = shufflevector <vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
+  %10 = shufflevector <vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %11 = shufflevector <vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i32> zeroinitializer
+ %12 = shufflevector <vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+ %13 = shufflevector <vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+ %14 = shufflevector <vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
+ %15 = shufflevector <vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+ %16 = shufflevector <vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i32> zeroinitializer
+ %17 = shufflevector <vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i32> zeroinitializer
+ %18 = shufflevector <vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i32> zeroinitializer
+ %19 = shufflevector <vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i32> zeroinitializer
+ %20 = shufflevector <vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i32> zeroinitializer
+ %21 = shufflevector <vscale x 16 x i1> undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %22 = shufflevector <vscale x 8 x i1> undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %23 = shufflevector <vscale x 4 x i1> undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %24 = shufflevector <vscale x 2 x i1> undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
   ret void
 }