[llvm] b52e6c5 - [CostModel]Add cost model for experimental.vector.reverse

Mon Feb 15 06:25:42 PST 2021

Author: Caroline Concatto
Date: 2021-02-15T14:23:57Z
New Revision: b52e6c58911f60eebd20b59bc89d7a230aececab

URL: https://github.com/llvm/llvm-project/commit/b52e6c58911f60eebd20b59bc89d7a230aececab
DIFF: https://github.com/llvm/llvm-project/commit/b52e6c58911f60eebd20b59bc89d7a230aececab.diff

LOG: [CostModel]Add cost model for experimental.vector.reverse

This patch uses the function getShuffleCost with SK_Reverse to compute the cost
for experimental.vector.reverse.
For scalable vector type, it adds a table will the legal types on
AArch64TTIImpl::getShuffleCost to not assert in BasicTTIImpl::getShuffleCost,
and for fixed vector, it relies on the existing cost model in BasicTTIImpl.

Depends on D94883

Differential Revision: https://reviews.llvm.org/D95603

Added: 
    llvm/test/Analysis/CostModel/AArch64/getIntrinsicInstrCost-vector-reverse.ll
    llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reverse.ll

Modified: 
    llvm/include/llvm/CodeGen/BasicTTIImpl.h
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 206fdeab066f..c790448cad21 100644

--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1278,6 +1278,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
           TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), Index,
           cast<VectorType>(Args[1]->getType()));
     }
+    case Intrinsic::experimental_vector_reverse: {
+      return thisT()->getShuffleCost(TTI::SK_Reverse,
+                                     cast<VectorType>(Args[0]->getType()), 0,
+                                     cast<VectorType>(RetTy));
+    }
     case Intrinsic::vector_reduce_add:
     case Intrinsic::vector_reduce_mul:
     case Intrinsic::vector_reduce_and:

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 33607e455d26..e6067ea144c3 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1183,7 +1183,8 @@ int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode,
 int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
                                    int Index, VectorType *SubTp) {
   if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
-      Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
+      Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc ||
+      Kind == TTI::SK_Reverse) {
     static const CostTblEntry ShuffleTbl[] = {
       // Broadcast shuffle kinds can be performed with 'dup'.
       { TTI::SK_Broadcast, MVT::v8i8,  1 },
@@ -1233,6 +1234,15 @@ int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
       { TTI::SK_Broadcast, MVT::nxv8bf16, 1 },
       { TTI::SK_Broadcast, MVT::nxv4f32,  1 },
       { TTI::SK_Broadcast, MVT::nxv2f64,  1 },
+      // Handle the cases for vector.reverse with scalable vectors
+      { TTI::SK_Reverse, MVT::nxv16i8,  1 },
+      { TTI::SK_Reverse, MVT::nxv8i16,  1 },
+      { TTI::SK_Reverse, MVT::nxv4i32,  1 },
+      { TTI::SK_Reverse, MVT::nxv2i64,  1 },
+      { TTI::SK_Reverse, MVT::nxv8f16,  1 },
+      { TTI::SK_Reverse, MVT::nxv8bf16, 1 },
+      { TTI::SK_Reverse, MVT::nxv4f32,  1 },
+      { TTI::SK_Reverse, MVT::nxv2f64,  1 },
     };
     std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
     if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))

diff  --git a/llvm/test/Analysis/CostModel/AArch64/getIntrinsicInstrCost-vector-reverse.ll b/llvm/test/Analysis/CostModel/AArch64/getIntrinsicInstrCost-vector-reverse.ll
new file mode 100644
index 000000000000..91d9c04e8204
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/getIntrinsicInstrCost-vector-reverse.ll
@@ -0,0 +1,65 @@
+; Check getIntrinsicInstrCost in BasicTTIImpl.h for vector.reverse
+
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve  < %s 2>%t | FileCheck %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+define void @vector_reverse() #0{
+; CHECK-LABEL: 'vector_reverse':
+; CHECK-NEXT: Cost Model: Found an estimated cost of 90 for instruction:   %1 = call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 180 for instruction:   %2 = call <32 x i8> @llvm.experimental.vector.reverse.v32i8(<32 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction:   %3 = call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction:   %4 = call <16 x i16> @llvm.experimental.vector.reverse.v16i16(<16 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction:   %5 = call <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction:   %6 = call <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %7 = call <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction:   %8 = call <4 x i64> @llvm.experimental.vector.reverse.v4i64(<4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction:   %9 = call <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction:   %10 = call <16 x half> @llvm.experimental.vector.reverse.v16f16(<16 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction:   %11 = call <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction:   %12 = call <8 x float> @llvm.experimental.vector.reverse.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %13 = call <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction:   %14 = call <4 x double> @llvm.experimental.vector.reverse.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction:   %15 = call <8 x bfloat> @llvm.experimental.vector.reverse.v8bf16(<8 x bfloat> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction:   %16 = call <16 x bfloat> @llvm.experimental.vector.reverse.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret void
+
+  call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> undef)
+  call <32 x i8> @llvm.experimental.vector.reverse.v32i8(<32 x i8> undef)
+  call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> undef)
+  call <16 x i16> @llvm.experimental.vector.reverse.v16i16(<16 x i16> undef)
+  call <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32> undef)
+  call <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32> undef)
+  call <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64> undef)
+  call <4 x i64> @llvm.experimental.vector.reverse.v4i64(<4 x i64> undef)
+  call <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half> undef)
+  call <16 x half> @llvm.experimental.vector.reverse.v16f16(<16 x half> undef)
+  call <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float> undef)
+  call <8 x float> @llvm.experimental.vector.reverse.v8f32(<8 x float> undef)
+  call <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double> undef)
+  call <4 x double> @llvm.experimental.vector.reverse.v4f64(<4 x double> undef)
+  call <8 x bfloat> @llvm.experimental.vector.reverse.v8bf16(<8 x bfloat> undef)
+  call <16 x bfloat> @llvm.experimental.vector.reverse.v16bf16(<16 x bfloat> undef)
+  ret void
+}
+
+attributes #0 = { "target-features"="+sve,+bf16" }
+declare <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8>)
+declare <32 x i8> @llvm.experimental.vector.reverse.v32i8(<32 x i8>)
+declare <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16>)
+declare <16 x i16> @llvm.experimental.vector.reverse.v16i16(<16 x i16>)
+declare <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32>)
+declare <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32>)
+declare <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64>)
+declare <4 x i64> @llvm.experimental.vector.reverse.v4i64(<4 x i64>)
+declare <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half>)
+declare <16 x half> @llvm.experimental.vector.reverse.v16f16(<16 x half>)
+declare <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float>)
+declare <8 x float> @llvm.experimental.vector.reverse.v8f32(<8 x float>)
+declare <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double>)
+declare <4 x double> @llvm.experimental.vector.reverse.v4f64(<4 x double>)
+declare <8 x bfloat> @llvm.experimental.vector.reverse.v8bf16(<8 x bfloat>)
+declare <16 x bfloat> @llvm.experimental.vector.reverse.v16bf16(<16 x bfloat>)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reverse.ll b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reverse.ll
new file mode 100644
index 000000000000..6de1a54a3fe2
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reverse.ll
@@ -0,0 +1,66 @@
+; Check getIntrinsicInstrCost in BasicTTIImpl.h for vector.reverse
+
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve  < %s 2>%t | FileCheck %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+define void @vector_reverse() #0 {
+; CHECK-LABEL: 'vector_reverse':
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %1 = call <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %2 = call <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %3 = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %4 = call <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %5 = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %6 = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %7 = call <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %8 = call <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %9 = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %10 = call <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %11 = call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %12 = call <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %13 = call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %14 = call <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %15 = call <vscale x 8 x bfloat> @llvm.experimental.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %16 = call <vscale x 16 x bfloat> @llvm.experimental.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret void
+
+  call <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
+  call <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
+  call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
+  call <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
+  call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
+  call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
+  call <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
+  call <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
+  call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half> undef)
+  call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x bfloat> @llvm.experimental.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.experimental.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef)
+  ret void
+}
+
+attributes #0 = { "target-features"="+sve,+bf16" }
+
+declare <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8>)
+declare <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8>)
+declare <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16>)
+declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32>)
+declare <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64>)
+declare <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half>)
+declare <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half>)
+declare <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float>)
+declare <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double>)
+declare <vscale x 8 x bfloat> @llvm.experimental.vector.reverse.nxv8bf16(<vscale x 8 x bfloat>)
+declare <vscale x 16 x bfloat> @llvm.experimental.vector.reverse.nxv16bf16(<vscale x 16 x bfloat>)