[llvm] 2cbcf3e - [AArch64][SVE]Add cost model for broadcast shuffle

Wed Feb 3 01:55:58 PST 2021

Author: Caroline Concatto
Date: 2021-02-03T09:53:22Z
New Revision: 2cbcf3e297d1ca210118e21fca6c5e55e22f23be

URL: https://github.com/llvm/llvm-project/commit/2cbcf3e297d1ca210118e21fca6c5e55e22f23be
DIFF: https://github.com/llvm/llvm-project/commit/2cbcf3e297d1ca210118e21fca6c5e55e22f23be.diff

LOG: [AArch64][SVE]Add cost model for broadcast shuffle

This patch adds a cost model for  SK_Broadcast in
AArch64TTIImpl::getShuffleCost with scalable vector.
Without this patch, the scalable vector type relies on  BasicTTIImpl cost
implementation and assert.

Differential Revision: https://reviews.llvm.org/D95598

Added: 
    llvm/test/Analysis/CostModel/AArch64/sve-shuffle-broadcast.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index dc62fa416e37..13ed462c4282 100644

--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1249,6 +1249,15 @@ int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
       { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
       { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
       { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
+      // Broadcast shuffle kinds for scalable vectors
+      { TTI::SK_Broadcast, MVT::nxv16i8,  1 },
+      { TTI::SK_Broadcast, MVT::nxv8i16,  1 },
+      { TTI::SK_Broadcast, MVT::nxv4i32,  1 },
+      { TTI::SK_Broadcast, MVT::nxv2i64,  1 },
+      { TTI::SK_Broadcast, MVT::nxv8f16,  1 },
+      { TTI::SK_Broadcast, MVT::nxv8bf16, 1 },
+      { TTI::SK_Broadcast, MVT::nxv4f32,  1 },
+      { TTI::SK_Broadcast, MVT::nxv2f64,  1 },
     };
     std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
     if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/AArch64/sve-shuffle-broadcast.ll
new file mode 100644
index 000000000000..b2f25ae74f50
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-shuffle-broadcast.ll
@@ -0,0 +1,50 @@
+; Check getShuffleCost for SK_BroadCast with scalable vector
+
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=sve  < %s 2>%t | FileCheck %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+
+define void  @broadcast() #0{
+; CHECK-LABEL: 'broadcast':
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %zero = shufflevector <vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %1 = shufflevector <vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %2 = shufflevector <vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %3 = shufflevector <vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %4 = shufflevector <vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %5 = shufflevector <vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %6 = shufflevector <vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %7 = shufflevector <vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %8 = shufflevector <vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %9 = shufflevector <vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %10 = shufflevector <vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %11 = shufflevector <vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %12 = shufflevector <vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %13 = shufflevector <vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %14 = shufflevector <vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %15 = shufflevector <vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i32> zeroinitializer
+; CHECK-NETX: Cost Model: Found an estimated cost of 0 for instruction:   ret void
+
+  %zero = shufflevector <vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+  %1 = shufflevector <vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
+  %2 = shufflevector <vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+  %3 = shufflevector <vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
+  %4 = shufflevector <vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %5 = shufflevector <vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
+  %6 = shufflevector <vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+  %7 = shufflevector <vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
+  %8 = shufflevector <vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %9 = shufflevector <vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i32> zeroinitializer
+ %10 = shufflevector <vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %11 = shufflevector <vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
+ %12 = shufflevector <vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+ %13 = shufflevector <vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i32> zeroinitializer
+ %14 = shufflevector <vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i32> zeroinitializer
+ %15 = shufflevector <vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i32> zeroinitializer
+  ret void
+}
+
+attributes #0 = { "target-features"="+sve,+bf16" }