[llvm] 723245b - [AARCH64][COST] Improve cost of reverse shuffles for AArch64

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 8 03:56:19 PDT 2022


Author: liqinweng
Date: 2022-09-08T18:55:49+08:00
New Revision: 723245bfac1ad0412ecdb012f284d1b920373cdb

URL: https://github.com/llvm/llvm-project/commit/723245bfac1ad0412ecdb012f284d1b920373cdb
DIFF: https://github.com/llvm/llvm-project/commit/723245bfac1ad0412ecdb012f284d1b920373cdb.diff

LOG: [AARCH64][COST] Improve cost of reverse shuffles for AArch64

Update the comments for reverse shuffles and add tests

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D132730

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 6bdd89d7fa8b6..5b8e236c77c1f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2929,116 +2929,117 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
       Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc ||
       Kind == TTI::SK_Reverse || Kind == TTI::SK_Splice) {
     static const CostTblEntry ShuffleTbl[] = {
-      // Broadcast shuffle kinds can be performed with 'dup'.
-      { TTI::SK_Broadcast, MVT::v8i8,  1 },
-      { TTI::SK_Broadcast, MVT::v16i8, 1 },
-      { TTI::SK_Broadcast, MVT::v4i16, 1 },
-      { TTI::SK_Broadcast, MVT::v8i16, 1 },
-      { TTI::SK_Broadcast, MVT::v2i32, 1 },
-      { TTI::SK_Broadcast, MVT::v4i32, 1 },
-      { TTI::SK_Broadcast, MVT::v2i64, 1 },
-      { TTI::SK_Broadcast, MVT::v2f32, 1 },
-      { TTI::SK_Broadcast, MVT::v4f32, 1 },
-      { TTI::SK_Broadcast, MVT::v2f64, 1 },
-      // Transpose shuffle kinds can be performed with 'trn1/trn2' and
-      // 'zip1/zip2' instructions.
-      { TTI::SK_Transpose, MVT::v8i8,  1 },
-      { TTI::SK_Transpose, MVT::v16i8, 1 },
-      { TTI::SK_Transpose, MVT::v4i16, 1 },
-      { TTI::SK_Transpose, MVT::v8i16, 1 },
-      { TTI::SK_Transpose, MVT::v2i32, 1 },
-      { TTI::SK_Transpose, MVT::v4i32, 1 },
-      { TTI::SK_Transpose, MVT::v2i64, 1 },
-      { TTI::SK_Transpose, MVT::v2f32, 1 },
-      { TTI::SK_Transpose, MVT::v4f32, 1 },
-      { TTI::SK_Transpose, MVT::v2f64, 1 },
-      // Select shuffle kinds.
-      // TODO: handle vXi8/vXi16.
-      { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
-      { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
-      { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
-      { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
-      { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
-      { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
-      // PermuteSingleSrc shuffle kinds.
-      { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
-      { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
-      { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
-      { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
-      { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
-      { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
-      { TTI::SK_PermuteSingleSrc, MVT::v4i16, 3 }, // perfectshuffle worst case.
-      { TTI::SK_PermuteSingleSrc, MVT::v4f16, 3 }, // perfectshuffle worst case.
-      { TTI::SK_PermuteSingleSrc, MVT::v4bf16, 3 }, // perfectshuffle worst case.
-      { TTI::SK_PermuteSingleSrc, MVT::v8i16, 8 }, // constpool + load + tbl
-      { TTI::SK_PermuteSingleSrc, MVT::v8f16, 8 }, // constpool + load + tbl
-      { TTI::SK_PermuteSingleSrc, MVT::v8bf16, 8 }, // constpool + load + tbl
-      { TTI::SK_PermuteSingleSrc, MVT::v8i8, 8 }, // constpool + load + tbl
-      { TTI::SK_PermuteSingleSrc, MVT::v16i8, 8 }, // constpool + load + tbl
-      // Reverse can be lowered with `rev`.
-      { TTI::SK_Reverse, MVT::v2i32, 1 }, // mov.
-      { TTI::SK_Reverse, MVT::v4i32, 2 }, // REV64; EXT
-      { TTI::SK_Reverse, MVT::v2i64, 1 }, // mov.
-      { TTI::SK_Reverse, MVT::v2f32, 1 }, // mov.
-      { TTI::SK_Reverse, MVT::v4f32, 2 }, // REV64; EXT
-      { TTI::SK_Reverse, MVT::v2f64, 1 }, // mov.
-      { TTI::SK_Reverse, MVT::v8f16, 2 }, // REV64; EXT
-      { TTI::SK_Reverse, MVT::v8i16, 2 }, // REV64; EXT
-      { TTI::SK_Reverse, MVT::v16i8, 2 }, // REV64; EXT
-      { TTI::SK_Reverse, MVT::v4f16, 1 }, // REV64
-      { TTI::SK_Reverse, MVT::v4i16, 1 }, // REV64
-      { TTI::SK_Reverse, MVT::v8i8, 1 }, // REV64
-      // Splice can all be lowered as `ext`.
-      { TTI::SK_Splice, MVT::v2i32, 1 },
-      { TTI::SK_Splice, MVT::v4i32, 1 },
-      { TTI::SK_Splice, MVT::v2i64, 1 },
-      { TTI::SK_Splice, MVT::v2f32, 1 },
-      { TTI::SK_Splice, MVT::v4f32, 1 },
-      { TTI::SK_Splice, MVT::v2f64, 1 },
-      { TTI::SK_Splice, MVT::v8f16, 1 },
-      { TTI::SK_Splice, MVT::v8bf16, 1 },
-      { TTI::SK_Splice, MVT::v8i16, 1 },
-      { TTI::SK_Splice, MVT::v16i8, 1 },
-      { TTI::SK_Splice, MVT::v4bf16, 1 },
-      { TTI::SK_Splice, MVT::v4f16, 1 },
-      { TTI::SK_Splice, MVT::v4i16, 1 },
-      { TTI::SK_Splice, MVT::v8i8, 1 },
-      // Broadcast shuffle kinds for scalable vectors
-      { TTI::SK_Broadcast, MVT::nxv16i8,  1 },
-      { TTI::SK_Broadcast, MVT::nxv8i16,  1 },
-      { TTI::SK_Broadcast, MVT::nxv4i32,  1 },
-      { TTI::SK_Broadcast, MVT::nxv2i64,  1 },
-      { TTI::SK_Broadcast, MVT::nxv2f16,  1 },
-      { TTI::SK_Broadcast, MVT::nxv4f16,  1 },
-      { TTI::SK_Broadcast, MVT::nxv8f16,  1 },
-      { TTI::SK_Broadcast, MVT::nxv2bf16, 1 },
-      { TTI::SK_Broadcast, MVT::nxv4bf16, 1 },
-      { TTI::SK_Broadcast, MVT::nxv8bf16, 1 },
-      { TTI::SK_Broadcast, MVT::nxv2f32,  1 },
-      { TTI::SK_Broadcast, MVT::nxv4f32,  1 },
-      { TTI::SK_Broadcast, MVT::nxv2f64,  1 },
-      { TTI::SK_Broadcast, MVT::nxv16i1,  1 },
-      { TTI::SK_Broadcast, MVT::nxv8i1,   1 },
-      { TTI::SK_Broadcast, MVT::nxv4i1,   1 },
-      { TTI::SK_Broadcast, MVT::nxv2i1,   1 },
-      // Handle the cases for vector.reverse with scalable vectors
-      { TTI::SK_Reverse, MVT::nxv16i8,  1 },
-      { TTI::SK_Reverse, MVT::nxv8i16,  1 },
-      { TTI::SK_Reverse, MVT::nxv4i32,  1 },
-      { TTI::SK_Reverse, MVT::nxv2i64,  1 },
-      { TTI::SK_Reverse, MVT::nxv2f16,  1 },
-      { TTI::SK_Reverse, MVT::nxv4f16,  1 },
-      { TTI::SK_Reverse, MVT::nxv8f16,  1 },
-      { TTI::SK_Reverse, MVT::nxv2bf16, 1 },
-      { TTI::SK_Reverse, MVT::nxv4bf16, 1 },
-      { TTI::SK_Reverse, MVT::nxv8bf16, 1 },
-      { TTI::SK_Reverse, MVT::nxv2f32,  1 },
-      { TTI::SK_Reverse, MVT::nxv4f32,  1 },
-      { TTI::SK_Reverse, MVT::nxv2f64,  1 },
-      { TTI::SK_Reverse, MVT::nxv16i1,  1 },
-      { TTI::SK_Reverse, MVT::nxv8i1,   1 },
-      { TTI::SK_Reverse, MVT::nxv4i1,   1 },
-      { TTI::SK_Reverse, MVT::nxv2i1,   1 },
+        // Broadcast shuffle kinds can be performed with 'dup'.
+        {TTI::SK_Broadcast, MVT::v8i8, 1},
+        {TTI::SK_Broadcast, MVT::v16i8, 1},
+        {TTI::SK_Broadcast, MVT::v4i16, 1},
+        {TTI::SK_Broadcast, MVT::v8i16, 1},
+        {TTI::SK_Broadcast, MVT::v2i32, 1},
+        {TTI::SK_Broadcast, MVT::v4i32, 1},
+        {TTI::SK_Broadcast, MVT::v2i64, 1},
+        {TTI::SK_Broadcast, MVT::v2f32, 1},
+        {TTI::SK_Broadcast, MVT::v4f32, 1},
+        {TTI::SK_Broadcast, MVT::v2f64, 1},
+        // Transpose shuffle kinds can be performed with 'trn1/trn2' and
+        // 'zip1/zip2' instructions.
+        {TTI::SK_Transpose, MVT::v8i8, 1},
+        {TTI::SK_Transpose, MVT::v16i8, 1},
+        {TTI::SK_Transpose, MVT::v4i16, 1},
+        {TTI::SK_Transpose, MVT::v8i16, 1},
+        {TTI::SK_Transpose, MVT::v2i32, 1},
+        {TTI::SK_Transpose, MVT::v4i32, 1},
+        {TTI::SK_Transpose, MVT::v2i64, 1},
+        {TTI::SK_Transpose, MVT::v2f32, 1},
+        {TTI::SK_Transpose, MVT::v4f32, 1},
+        {TTI::SK_Transpose, MVT::v2f64, 1},
+        // Select shuffle kinds.
+        // TODO: handle vXi8/vXi16.
+        {TTI::SK_Select, MVT::v2i32, 1}, // mov.
+        {TTI::SK_Select, MVT::v4i32, 2}, // rev+trn (or similar).
+        {TTI::SK_Select, MVT::v2i64, 1}, // mov.
+        {TTI::SK_Select, MVT::v2f32, 1}, // mov.
+        {TTI::SK_Select, MVT::v4f32, 2}, // rev+trn (or similar).
+        {TTI::SK_Select, MVT::v2f64, 1}, // mov.
+        // PermuteSingleSrc shuffle kinds.
+        {TTI::SK_PermuteSingleSrc, MVT::v2i32, 1}, // mov.
+        {TTI::SK_PermuteSingleSrc, MVT::v4i32, 3}, // perfectshuffle worst case.
+        {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, // mov.
+        {TTI::SK_PermuteSingleSrc, MVT::v2f32, 1}, // mov.
+        {TTI::SK_PermuteSingleSrc, MVT::v4f32, 3}, // perfectshuffle worst case.
+        {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // mov.
+        {TTI::SK_PermuteSingleSrc, MVT::v4i16, 3}, // perfectshuffle worst case.
+        {TTI::SK_PermuteSingleSrc, MVT::v4f16, 3}, // perfectshuffle worst case.
+        {TTI::SK_PermuteSingleSrc, MVT::v4bf16,
+         3},                                       // perfectshuffle worst case.
+        {TTI::SK_PermuteSingleSrc, MVT::v8i16, 8}, // constpool + load + tbl
+        {TTI::SK_PermuteSingleSrc, MVT::v8f16, 8}, // constpool + load + tbl
+        {TTI::SK_PermuteSingleSrc, MVT::v8bf16, 8}, // constpool + load + tbl
+        {TTI::SK_PermuteSingleSrc, MVT::v8i8, 8},   // constpool + load + tbl
+        {TTI::SK_PermuteSingleSrc, MVT::v16i8, 8},  // constpool + load + tbl
+        // Reverse can be lowered with `rev`.
+        {TTI::SK_Reverse, MVT::v2i32, 1}, // REV64
+        {TTI::SK_Reverse, MVT::v4i32, 2}, // REV64; EXT
+        {TTI::SK_Reverse, MVT::v2i64, 1}, // EXT
+        {TTI::SK_Reverse, MVT::v2f32, 1}, // REV64
+        {TTI::SK_Reverse, MVT::v4f32, 2}, // REV64; EXT
+        {TTI::SK_Reverse, MVT::v2f64, 1}, // EXT
+        {TTI::SK_Reverse, MVT::v8f16, 2}, // REV64; EXT
+        {TTI::SK_Reverse, MVT::v8i16, 2}, // REV64; EXT
+        {TTI::SK_Reverse, MVT::v16i8, 2}, // REV64; EXT
+        {TTI::SK_Reverse, MVT::v4f16, 1}, // REV64
+        {TTI::SK_Reverse, MVT::v4i16, 1}, // REV64
+        {TTI::SK_Reverse, MVT::v8i8, 1},  // REV64
+        // Splice can all be lowered as `ext`.
+        {TTI::SK_Splice, MVT::v2i32, 1},
+        {TTI::SK_Splice, MVT::v4i32, 1},
+        {TTI::SK_Splice, MVT::v2i64, 1},
+        {TTI::SK_Splice, MVT::v2f32, 1},
+        {TTI::SK_Splice, MVT::v4f32, 1},
+        {TTI::SK_Splice, MVT::v2f64, 1},
+        {TTI::SK_Splice, MVT::v8f16, 1},
+        {TTI::SK_Splice, MVT::v8bf16, 1},
+        {TTI::SK_Splice, MVT::v8i16, 1},
+        {TTI::SK_Splice, MVT::v16i8, 1},
+        {TTI::SK_Splice, MVT::v4bf16, 1},
+        {TTI::SK_Splice, MVT::v4f16, 1},
+        {TTI::SK_Splice, MVT::v4i16, 1},
+        {TTI::SK_Splice, MVT::v8i8, 1},
+        // Broadcast shuffle kinds for scalable vectors
+        {TTI::SK_Broadcast, MVT::nxv16i8, 1},
+        {TTI::SK_Broadcast, MVT::nxv8i16, 1},
+        {TTI::SK_Broadcast, MVT::nxv4i32, 1},
+        {TTI::SK_Broadcast, MVT::nxv2i64, 1},
+        {TTI::SK_Broadcast, MVT::nxv2f16, 1},
+        {TTI::SK_Broadcast, MVT::nxv4f16, 1},
+        {TTI::SK_Broadcast, MVT::nxv8f16, 1},
+        {TTI::SK_Broadcast, MVT::nxv2bf16, 1},
+        {TTI::SK_Broadcast, MVT::nxv4bf16, 1},
+        {TTI::SK_Broadcast, MVT::nxv8bf16, 1},
+        {TTI::SK_Broadcast, MVT::nxv2f32, 1},
+        {TTI::SK_Broadcast, MVT::nxv4f32, 1},
+        {TTI::SK_Broadcast, MVT::nxv2f64, 1},
+        {TTI::SK_Broadcast, MVT::nxv16i1, 1},
+        {TTI::SK_Broadcast, MVT::nxv8i1, 1},
+        {TTI::SK_Broadcast, MVT::nxv4i1, 1},
+        {TTI::SK_Broadcast, MVT::nxv2i1, 1},
+        // Handle the cases for vector.reverse with scalable vectors
+        {TTI::SK_Reverse, MVT::nxv16i8, 1},
+        {TTI::SK_Reverse, MVT::nxv8i16, 1},
+        {TTI::SK_Reverse, MVT::nxv4i32, 1},
+        {TTI::SK_Reverse, MVT::nxv2i64, 1},
+        {TTI::SK_Reverse, MVT::nxv2f16, 1},
+        {TTI::SK_Reverse, MVT::nxv4f16, 1},
+        {TTI::SK_Reverse, MVT::nxv8f16, 1},
+        {TTI::SK_Reverse, MVT::nxv2bf16, 1},
+        {TTI::SK_Reverse, MVT::nxv4bf16, 1},
+        {TTI::SK_Reverse, MVT::nxv8bf16, 1},
+        {TTI::SK_Reverse, MVT::nxv2f32, 1},
+        {TTI::SK_Reverse, MVT::nxv4f32, 1},
+        {TTI::SK_Reverse, MVT::nxv2f64, 1},
+        {TTI::SK_Reverse, MVT::nxv16i1, 1},
+        {TTI::SK_Reverse, MVT::nxv8i1, 1},
+        {TTI::SK_Reverse, MVT::nxv4i1, 1},
+        {TTI::SK_Reverse, MVT::nxv2i1, 1},
     };
     if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
       return LT.first * Entry->Cost;

diff  --git a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
index 7fbed2d37e833..0eee19ad2adb1 100644
--- a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
@@ -30,6 +30,24 @@ define <8 x i16> @reverse_v8i16(<8 x i16> %a) #0 {
   ret <8 x i16> %res
 }
 
+define <2 x i16> @reverse_v2i16(<2 x i16> %a) #0 {
+; CHECK-LABEL: reverse_v2i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev64 v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %res = call <2 x i16> @llvm.experimental.vector.reverse.v2i16(<2 x i16> %a)
+  ret <2 x i16> %res
+}
+
+define <2 x i32> @reverse_v2i32(<2 x i32> %a) #0 {
+; CHECK-LABEL: reverse_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev64 v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %res = call <2 x i32> @llvm.experimental.vector.reverse.v2i32(<2 x i32> %a)
+  ret <2 x i32> %res
+}
+
 define <4 x i32> @reverse_v4i32(<4 x i32> %a) #0 {
 ; CHECK-LABEL: reverse_v4i32:
 ; CHECK:       // %bb.0:
@@ -62,6 +80,15 @@ define <8 x half> @reverse_v8f16(<8 x half> %a) #0 {
   ret <8 x half> %res
 }
 
+define <2 x float> @reverse_v2f32(<2 x float> %a) #0 {
+; CHECK-LABEL: reverse_v2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev64 v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %res = call <2 x float> @llvm.experimental.vector.reverse.v2f32(<2 x float> %a)
+  ret <2 x float> %res
+}
+
 define <4 x float> @reverse_v4f32(<4 x float> %a) #0 {
 ; CHECK-LABEL: reverse_v4f32:
 ; CHECK:       // %bb.0:
@@ -163,10 +190,13 @@ define <16 x float> @reverse_v16f32(<16 x float> %a) #0 {
 declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8>)
 declare <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8>)
 declare <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16>)
+declare <2 x i16> @llvm.experimental.vector.reverse.v2i16(<2 x i16>)
+declare <2 x i32> @llvm.experimental.vector.reverse.v2i32(<2 x i32>)
 declare <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32>)
 declare <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32>)
 declare <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64>)
 declare <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half>)
+declare <2 x float> @llvm.experimental.vector.reverse.v2f32(<2 x float>)
 declare <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float>)
 declare <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float>)
 declare <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double>)


        


More information about the llvm-commits mailing list