[llvm] 723245b - [AARCH64][COST] Improve cost of reverse shuffles for AArch64
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 8 03:56:19 PDT 2022
Author: liqinweng
Date: 2022-09-08T18:55:49+08:00
New Revision: 723245bfac1ad0412ecdb012f284d1b920373cdb
URL: https://github.com/llvm/llvm-project/commit/723245bfac1ad0412ecdb012f284d1b920373cdb
DIFF: https://github.com/llvm/llvm-project/commit/723245bfac1ad0412ecdb012f284d1b920373cdb.diff
LOG: [AARCH64][COST] Improve cost of reverse shuffles for AArch64
Update the comments for reverse shuffles and add tests
Reviewed By: dmgreen
Differential Revision: https://reviews.llvm.org/D132730
Added:
Modified:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 6bdd89d7fa8b6..5b8e236c77c1f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2929,116 +2929,117 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc ||
Kind == TTI::SK_Reverse || Kind == TTI::SK_Splice) {
static const CostTblEntry ShuffleTbl[] = {
- // Broadcast shuffle kinds can be performed with 'dup'.
- { TTI::SK_Broadcast, MVT::v8i8, 1 },
- { TTI::SK_Broadcast, MVT::v16i8, 1 },
- { TTI::SK_Broadcast, MVT::v4i16, 1 },
- { TTI::SK_Broadcast, MVT::v8i16, 1 },
- { TTI::SK_Broadcast, MVT::v2i32, 1 },
- { TTI::SK_Broadcast, MVT::v4i32, 1 },
- { TTI::SK_Broadcast, MVT::v2i64, 1 },
- { TTI::SK_Broadcast, MVT::v2f32, 1 },
- { TTI::SK_Broadcast, MVT::v4f32, 1 },
- { TTI::SK_Broadcast, MVT::v2f64, 1 },
- // Transpose shuffle kinds can be performed with 'trn1/trn2' and
- // 'zip1/zip2' instructions.
- { TTI::SK_Transpose, MVT::v8i8, 1 },
- { TTI::SK_Transpose, MVT::v16i8, 1 },
- { TTI::SK_Transpose, MVT::v4i16, 1 },
- { TTI::SK_Transpose, MVT::v8i16, 1 },
- { TTI::SK_Transpose, MVT::v2i32, 1 },
- { TTI::SK_Transpose, MVT::v4i32, 1 },
- { TTI::SK_Transpose, MVT::v2i64, 1 },
- { TTI::SK_Transpose, MVT::v2f32, 1 },
- { TTI::SK_Transpose, MVT::v4f32, 1 },
- { TTI::SK_Transpose, MVT::v2f64, 1 },
- // Select shuffle kinds.
- // TODO: handle vXi8/vXi16.
- { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
- { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
- { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
- { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
- { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
- { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
- // PermuteSingleSrc shuffle kinds.
- { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
- { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
- { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
- { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
- { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
- { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
- { TTI::SK_PermuteSingleSrc, MVT::v4i16, 3 }, // perfectshuffle worst case.
- { TTI::SK_PermuteSingleSrc, MVT::v4f16, 3 }, // perfectshuffle worst case.
- { TTI::SK_PermuteSingleSrc, MVT::v4bf16, 3 }, // perfectshuffle worst case.
- { TTI::SK_PermuteSingleSrc, MVT::v8i16, 8 }, // constpool + load + tbl
- { TTI::SK_PermuteSingleSrc, MVT::v8f16, 8 }, // constpool + load + tbl
- { TTI::SK_PermuteSingleSrc, MVT::v8bf16, 8 }, // constpool + load + tbl
- { TTI::SK_PermuteSingleSrc, MVT::v8i8, 8 }, // constpool + load + tbl
- { TTI::SK_PermuteSingleSrc, MVT::v16i8, 8 }, // constpool + load + tbl
- // Reverse can be lowered with `rev`.
- { TTI::SK_Reverse, MVT::v2i32, 1 }, // mov.
- { TTI::SK_Reverse, MVT::v4i32, 2 }, // REV64; EXT
- { TTI::SK_Reverse, MVT::v2i64, 1 }, // mov.
- { TTI::SK_Reverse, MVT::v2f32, 1 }, // mov.
- { TTI::SK_Reverse, MVT::v4f32, 2 }, // REV64; EXT
- { TTI::SK_Reverse, MVT::v2f64, 1 }, // mov.
- { TTI::SK_Reverse, MVT::v8f16, 2 }, // REV64; EXT
- { TTI::SK_Reverse, MVT::v8i16, 2 }, // REV64; EXT
- { TTI::SK_Reverse, MVT::v16i8, 2 }, // REV64; EXT
- { TTI::SK_Reverse, MVT::v4f16, 1 }, // REV64
- { TTI::SK_Reverse, MVT::v4i16, 1 }, // REV64
- { TTI::SK_Reverse, MVT::v8i8, 1 }, // REV64
- // Splice can all be lowered as `ext`.
- { TTI::SK_Splice, MVT::v2i32, 1 },
- { TTI::SK_Splice, MVT::v4i32, 1 },
- { TTI::SK_Splice, MVT::v2i64, 1 },
- { TTI::SK_Splice, MVT::v2f32, 1 },
- { TTI::SK_Splice, MVT::v4f32, 1 },
- { TTI::SK_Splice, MVT::v2f64, 1 },
- { TTI::SK_Splice, MVT::v8f16, 1 },
- { TTI::SK_Splice, MVT::v8bf16, 1 },
- { TTI::SK_Splice, MVT::v8i16, 1 },
- { TTI::SK_Splice, MVT::v16i8, 1 },
- { TTI::SK_Splice, MVT::v4bf16, 1 },
- { TTI::SK_Splice, MVT::v4f16, 1 },
- { TTI::SK_Splice, MVT::v4i16, 1 },
- { TTI::SK_Splice, MVT::v8i8, 1 },
- // Broadcast shuffle kinds for scalable vectors
- { TTI::SK_Broadcast, MVT::nxv16i8, 1 },
- { TTI::SK_Broadcast, MVT::nxv8i16, 1 },
- { TTI::SK_Broadcast, MVT::nxv4i32, 1 },
- { TTI::SK_Broadcast, MVT::nxv2i64, 1 },
- { TTI::SK_Broadcast, MVT::nxv2f16, 1 },
- { TTI::SK_Broadcast, MVT::nxv4f16, 1 },
- { TTI::SK_Broadcast, MVT::nxv8f16, 1 },
- { TTI::SK_Broadcast, MVT::nxv2bf16, 1 },
- { TTI::SK_Broadcast, MVT::nxv4bf16, 1 },
- { TTI::SK_Broadcast, MVT::nxv8bf16, 1 },
- { TTI::SK_Broadcast, MVT::nxv2f32, 1 },
- { TTI::SK_Broadcast, MVT::nxv4f32, 1 },
- { TTI::SK_Broadcast, MVT::nxv2f64, 1 },
- { TTI::SK_Broadcast, MVT::nxv16i1, 1 },
- { TTI::SK_Broadcast, MVT::nxv8i1, 1 },
- { TTI::SK_Broadcast, MVT::nxv4i1, 1 },
- { TTI::SK_Broadcast, MVT::nxv2i1, 1 },
- // Handle the cases for vector.reverse with scalable vectors
- { TTI::SK_Reverse, MVT::nxv16i8, 1 },
- { TTI::SK_Reverse, MVT::nxv8i16, 1 },
- { TTI::SK_Reverse, MVT::nxv4i32, 1 },
- { TTI::SK_Reverse, MVT::nxv2i64, 1 },
- { TTI::SK_Reverse, MVT::nxv2f16, 1 },
- { TTI::SK_Reverse, MVT::nxv4f16, 1 },
- { TTI::SK_Reverse, MVT::nxv8f16, 1 },
- { TTI::SK_Reverse, MVT::nxv2bf16, 1 },
- { TTI::SK_Reverse, MVT::nxv4bf16, 1 },
- { TTI::SK_Reverse, MVT::nxv8bf16, 1 },
- { TTI::SK_Reverse, MVT::nxv2f32, 1 },
- { TTI::SK_Reverse, MVT::nxv4f32, 1 },
- { TTI::SK_Reverse, MVT::nxv2f64, 1 },
- { TTI::SK_Reverse, MVT::nxv16i1, 1 },
- { TTI::SK_Reverse, MVT::nxv8i1, 1 },
- { TTI::SK_Reverse, MVT::nxv4i1, 1 },
- { TTI::SK_Reverse, MVT::nxv2i1, 1 },
+ // Broadcast shuffle kinds can be performed with 'dup'.
+ {TTI::SK_Broadcast, MVT::v8i8, 1},
+ {TTI::SK_Broadcast, MVT::v16i8, 1},
+ {TTI::SK_Broadcast, MVT::v4i16, 1},
+ {TTI::SK_Broadcast, MVT::v8i16, 1},
+ {TTI::SK_Broadcast, MVT::v2i32, 1},
+ {TTI::SK_Broadcast, MVT::v4i32, 1},
+ {TTI::SK_Broadcast, MVT::v2i64, 1},
+ {TTI::SK_Broadcast, MVT::v2f32, 1},
+ {TTI::SK_Broadcast, MVT::v4f32, 1},
+ {TTI::SK_Broadcast, MVT::v2f64, 1},
+ // Transpose shuffle kinds can be performed with 'trn1/trn2' and
+ // 'zip1/zip2' instructions.
+ {TTI::SK_Transpose, MVT::v8i8, 1},
+ {TTI::SK_Transpose, MVT::v16i8, 1},
+ {TTI::SK_Transpose, MVT::v4i16, 1},
+ {TTI::SK_Transpose, MVT::v8i16, 1},
+ {TTI::SK_Transpose, MVT::v2i32, 1},
+ {TTI::SK_Transpose, MVT::v4i32, 1},
+ {TTI::SK_Transpose, MVT::v2i64, 1},
+ {TTI::SK_Transpose, MVT::v2f32, 1},
+ {TTI::SK_Transpose, MVT::v4f32, 1},
+ {TTI::SK_Transpose, MVT::v2f64, 1},
+ // Select shuffle kinds.
+ // TODO: handle vXi8/vXi16.
+ {TTI::SK_Select, MVT::v2i32, 1}, // mov.
+ {TTI::SK_Select, MVT::v4i32, 2}, // rev+trn (or similar).
+ {TTI::SK_Select, MVT::v2i64, 1}, // mov.
+ {TTI::SK_Select, MVT::v2f32, 1}, // mov.
+ {TTI::SK_Select, MVT::v4f32, 2}, // rev+trn (or similar).
+ {TTI::SK_Select, MVT::v2f64, 1}, // mov.
+ // PermuteSingleSrc shuffle kinds.
+ {TTI::SK_PermuteSingleSrc, MVT::v2i32, 1}, // mov.
+ {TTI::SK_PermuteSingleSrc, MVT::v4i32, 3}, // perfectshuffle worst case.
+ {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, // mov.
+ {TTI::SK_PermuteSingleSrc, MVT::v2f32, 1}, // mov.
+ {TTI::SK_PermuteSingleSrc, MVT::v4f32, 3}, // perfectshuffle worst case.
+ {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // mov.
+ {TTI::SK_PermuteSingleSrc, MVT::v4i16, 3}, // perfectshuffle worst case.
+ {TTI::SK_PermuteSingleSrc, MVT::v4f16, 3}, // perfectshuffle worst case.
+ {TTI::SK_PermuteSingleSrc, MVT::v4bf16,
+ 3}, // perfectshuffle worst case.
+ {TTI::SK_PermuteSingleSrc, MVT::v8i16, 8}, // constpool + load + tbl
+ {TTI::SK_PermuteSingleSrc, MVT::v8f16, 8}, // constpool + load + tbl
+ {TTI::SK_PermuteSingleSrc, MVT::v8bf16, 8}, // constpool + load + tbl
+ {TTI::SK_PermuteSingleSrc, MVT::v8i8, 8}, // constpool + load + tbl
+ {TTI::SK_PermuteSingleSrc, MVT::v16i8, 8}, // constpool + load + tbl
+ // Reverse can be lowered with `rev`.
+ {TTI::SK_Reverse, MVT::v2i32, 1}, // REV64
+ {TTI::SK_Reverse, MVT::v4i32, 2}, // REV64; EXT
+ {TTI::SK_Reverse, MVT::v2i64, 1}, // EXT
+ {TTI::SK_Reverse, MVT::v2f32, 1}, // REV64
+ {TTI::SK_Reverse, MVT::v4f32, 2}, // REV64; EXT
+ {TTI::SK_Reverse, MVT::v2f64, 1}, // EXT
+ {TTI::SK_Reverse, MVT::v8f16, 2}, // REV64; EXT
+ {TTI::SK_Reverse, MVT::v8i16, 2}, // REV64; EXT
+ {TTI::SK_Reverse, MVT::v16i8, 2}, // REV64; EXT
+ {TTI::SK_Reverse, MVT::v4f16, 1}, // REV64
+ {TTI::SK_Reverse, MVT::v4i16, 1}, // REV64
+ {TTI::SK_Reverse, MVT::v8i8, 1}, // REV64
+ // Splice can all be lowered as `ext`.
+ {TTI::SK_Splice, MVT::v2i32, 1},
+ {TTI::SK_Splice, MVT::v4i32, 1},
+ {TTI::SK_Splice, MVT::v2i64, 1},
+ {TTI::SK_Splice, MVT::v2f32, 1},
+ {TTI::SK_Splice, MVT::v4f32, 1},
+ {TTI::SK_Splice, MVT::v2f64, 1},
+ {TTI::SK_Splice, MVT::v8f16, 1},
+ {TTI::SK_Splice, MVT::v8bf16, 1},
+ {TTI::SK_Splice, MVT::v8i16, 1},
+ {TTI::SK_Splice, MVT::v16i8, 1},
+ {TTI::SK_Splice, MVT::v4bf16, 1},
+ {TTI::SK_Splice, MVT::v4f16, 1},
+ {TTI::SK_Splice, MVT::v4i16, 1},
+ {TTI::SK_Splice, MVT::v8i8, 1},
+ // Broadcast shuffle kinds for scalable vectors
+ {TTI::SK_Broadcast, MVT::nxv16i8, 1},
+ {TTI::SK_Broadcast, MVT::nxv8i16, 1},
+ {TTI::SK_Broadcast, MVT::nxv4i32, 1},
+ {TTI::SK_Broadcast, MVT::nxv2i64, 1},
+ {TTI::SK_Broadcast, MVT::nxv2f16, 1},
+ {TTI::SK_Broadcast, MVT::nxv4f16, 1},
+ {TTI::SK_Broadcast, MVT::nxv8f16, 1},
+ {TTI::SK_Broadcast, MVT::nxv2bf16, 1},
+ {TTI::SK_Broadcast, MVT::nxv4bf16, 1},
+ {TTI::SK_Broadcast, MVT::nxv8bf16, 1},
+ {TTI::SK_Broadcast, MVT::nxv2f32, 1},
+ {TTI::SK_Broadcast, MVT::nxv4f32, 1},
+ {TTI::SK_Broadcast, MVT::nxv2f64, 1},
+ {TTI::SK_Broadcast, MVT::nxv16i1, 1},
+ {TTI::SK_Broadcast, MVT::nxv8i1, 1},
+ {TTI::SK_Broadcast, MVT::nxv4i1, 1},
+ {TTI::SK_Broadcast, MVT::nxv2i1, 1},
+ // Handle the cases for vector.reverse with scalable vectors
+ {TTI::SK_Reverse, MVT::nxv16i8, 1},
+ {TTI::SK_Reverse, MVT::nxv8i16, 1},
+ {TTI::SK_Reverse, MVT::nxv4i32, 1},
+ {TTI::SK_Reverse, MVT::nxv2i64, 1},
+ {TTI::SK_Reverse, MVT::nxv2f16, 1},
+ {TTI::SK_Reverse, MVT::nxv4f16, 1},
+ {TTI::SK_Reverse, MVT::nxv8f16, 1},
+ {TTI::SK_Reverse, MVT::nxv2bf16, 1},
+ {TTI::SK_Reverse, MVT::nxv4bf16, 1},
+ {TTI::SK_Reverse, MVT::nxv8bf16, 1},
+ {TTI::SK_Reverse, MVT::nxv2f32, 1},
+ {TTI::SK_Reverse, MVT::nxv4f32, 1},
+ {TTI::SK_Reverse, MVT::nxv2f64, 1},
+ {TTI::SK_Reverse, MVT::nxv16i1, 1},
+ {TTI::SK_Reverse, MVT::nxv8i1, 1},
+ {TTI::SK_Reverse, MVT::nxv4i1, 1},
+ {TTI::SK_Reverse, MVT::nxv2i1, 1},
};
if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
index 7fbed2d37e833..0eee19ad2adb1 100644
--- a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
@@ -30,6 +30,24 @@ define <8 x i16> @reverse_v8i16(<8 x i16> %a) #0 {
ret <8 x i16> %res
}
+define <2 x i16> @reverse_v2i16(<2 x i16> %a) #0 {
+; CHECK-LABEL: reverse_v2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev64 v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %res = call <2 x i16> @llvm.experimental.vector.reverse.v2i16(<2 x i16> %a)
+ ret <2 x i16> %res
+}
+
+define <2 x i32> @reverse_v2i32(<2 x i32> %a) #0 {
+; CHECK-LABEL: reverse_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev64 v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %res = call <2 x i32> @llvm.experimental.vector.reverse.v2i32(<2 x i32> %a)
+ ret <2 x i32> %res
+}
+
define <4 x i32> @reverse_v4i32(<4 x i32> %a) #0 {
; CHECK-LABEL: reverse_v4i32:
; CHECK: // %bb.0:
@@ -62,6 +80,15 @@ define <8 x half> @reverse_v8f16(<8 x half> %a) #0 {
ret <8 x half> %res
}
+define <2 x float> @reverse_v2f32(<2 x float> %a) #0 {
+; CHECK-LABEL: reverse_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev64 v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %res = call <2 x float> @llvm.experimental.vector.reverse.v2f32(<2 x float> %a)
+ ret <2 x float> %res
+}
+
define <4 x float> @reverse_v4f32(<4 x float> %a) #0 {
; CHECK-LABEL: reverse_v4f32:
; CHECK: // %bb.0:
@@ -163,10 +190,13 @@ define <16 x float> @reverse_v16f32(<16 x float> %a) #0 {
declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8>)
declare <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8>)
declare <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16>)
+declare <2 x i16> @llvm.experimental.vector.reverse.v2i16(<2 x i16>)
+declare <2 x i32> @llvm.experimental.vector.reverse.v2i32(<2 x i32>)
declare <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32>)
declare <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32>)
declare <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64>)
declare <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half>)
+declare <2 x float> @llvm.experimental.vector.reverse.v2f32(<2 x float>)
declare <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float>)
declare <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float>)
declare <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double>)
More information about the llvm-commits
mailing list