[llvm] c0bf592 - [AArch64] Improve vector reverse lowering
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 22 13:01:39 PDT 2021
Author: David Green
Date: 2021-04-22T21:01:25+01:00
New Revision: c0bf5929eea7ee86d664f069c2cfb9c65fcda05c
URL: https://github.com/llvm/llvm-project/commit/c0bf5929eea7ee86d664f069c2cfb9c65fcda05c
DIFF: https://github.com/llvm/llvm-project/commit/c0bf5929eea7ee86d664f069c2cfb9c65fcda05c.diff
LOG: [AArch64] Improve vector reverse lowering
This improves the lowering of v8i16 and v16i8 vector reverse shuffles.
Instead of going via a generic tbl it uses a rev64; ext pair, as already
happens for v4i32.
Differential Revision: https://reviews.llvm.org/D100882
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
llvm/test/CodeGen/AArch64/neon-reverseshuffle.patch
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 87a0361d1bc1..f9cba250a2af 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9007,6 +9007,10 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
+ assert(V1.getValueType() == VT && "Unexpected VECTOR_SHUFFLE type!");
+ assert(ShuffleMask.size() == VT.getVectorNumElements() &&
+ "Unexpected VECTOR_SHUFFLE mask size!");
+
if (SVN->isSplat()) {
int Lane = SVN->getSplatIndex();
// If this is undef splat, generate it via "just" vdup, if possible.
@@ -9053,6 +9057,14 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (isREVMask(ShuffleMask, VT, 16))
return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
+ if (((VT.getVectorNumElements() == 8 && VT.getScalarSizeInBits() == 16) ||
+ (VT.getVectorNumElements() == 16 && VT.getScalarSizeInBits() == 8)) &&
+ ShuffleVectorInst::isReverseMask(ShuffleMask)) {
+ SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1);
+ return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev,
+ DAG.getConstant(8, dl, MVT::i32));
+ }
+
bool ReverseEXT = false;
unsigned Imm;
if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
index 8cdaf4e2dfda..0e6d19e451dc 100644
--- a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
@@ -9,28 +9,10 @@ target triple = "aarch64-unknown-linux-gnu"
;
define <16 x i8> @reverse_v16i8(<16 x i8> %a) #0 {
-; CHECK-LABEL: .LCPI0_0:
-; CHECK: .byte 15 // 0xf
-; CHECK-NEXT: .byte 14 // 0xe
-; CHECK-NEXT: .byte 13 // 0xd
-; CHECK-NEXT: .byte 12 // 0xc
-; CHECK-NEXT: .byte 11 // 0xb
-; CHECK-NEXT: .byte 10 // 0xa
-; CHECK-NEXT: .byte 9 // 0x9
-; CHECK-NEXT: .byte 8 // 0x8
-; CHECK-NEXT: .byte 7 // 0x7
-; CHECK-NEXT: .byte 6 // 0x6
-; CHECK-NEXT: .byte 5 // 0x5
-; CHECK-NEXT: .byte 4 // 0x4
-; CHECK-NEXT: .byte 3 // 0x3
-; CHECK-NEXT: .byte 2 // 0x2
-; CHECK-NEXT: .byte 1 // 0x1
-; CHECK-NEXT: .byte 0 // 0x0
; CHECK-LABEL: reverse_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-NEXT: rev64 v0.16b, v0.16b
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: ret
%res = call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> %a)
@@ -38,28 +20,10 @@ define <16 x i8> @reverse_v16i8(<16 x i8> %a) #0 {
}
define <8 x i16> @reverse_v8i16(<8 x i16> %a) #0 {
-; CHECK-LABEL: .LCPI1_0:
-; CHECK: .byte 14 // 0xe
-; CHECK-NEXT: .byte 15 // 0xf
-; CHECK-NEXT: .byte 12 // 0xc
-; CHECK-NEXT: .byte 13 // 0xd
-; CHECK-NEXT: .byte 10 // 0xa
-; CHECK-NEXT: .byte 11 // 0xb
-; CHECK-NEXT: .byte 8 // 0x8
-; CHECK-NEXT: .byte 9 // 0x9
-; CHECK-NEXT: .byte 6 // 0x6
-; CHECK-NEXT: .byte 7 // 0x7
-; CHECK-NEXT: .byte 4 // 0x4
-; CHECK-NEXT: .byte 5 // 0x5
-; CHECK-NEXT: .byte 2 // 0x2
-; CHECK-NEXT: .byte 3 // 0x3
-; CHECK-NEXT: .byte 0 // 0x0
-; CHECK-NEXT: .byte 1 // 0x1
; CHECK-LABEL: reverse_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI1_0
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-NEXT: rev64 v0.8h, v0.8h
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: ret
%res = call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> %a)
@@ -88,28 +52,10 @@ define <2 x i64> @reverse_v2i64(<2 x i64> %a) #0 {
}
define <8 x half> @reverse_v8f16(<8 x half> %a) #0 {
-; CHECK-LABEL: .LCPI4_0:
-; CHECK: .byte 14 // 0xe
-; CHECK-NEXT: .byte 15 // 0xf
-; CHECK-NEXT: .byte 12 // 0xc
-; CHECK-NEXT: .byte 13 // 0xd
-; CHECK-NEXT: .byte 10 // 0xa
-; CHECK-NEXT: .byte 11 // 0xb
-; CHECK-NEXT: .byte 8 // 0x8
-; CHECK-NEXT: .byte 9 // 0x9
-; CHECK-NEXT: .byte 6 // 0x6
-; CHECK-NEXT: .byte 7 // 0x7
-; CHECK-NEXT: .byte 4 // 0x4
-; CHECK-NEXT: .byte 5 // 0x5
-; CHECK-NEXT: .byte 2 // 0x2
-; CHECK-NEXT: .byte 3 // 0x3
-; CHECK-NEXT: .byte 0 // 0x0
-; CHECK-NEXT: .byte 1 // 0x1
; CHECK-LABEL: reverse_v8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI4_0
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-NEXT: rev64 v0.8h, v0.8h
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: ret
%res = call <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half> %a)
diff --git a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.patch b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.patch
index 173dec742f4d..f470c0965e43 100644
--- a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.patch
+++ b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.patch
@@ -35,9 +35,8 @@ entry:
define <8 x i16> @v8i16(<8 x i16> %a) {
; CHECK-LABEL: v8i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: adrp x8, .LCPI3_0
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-NEXT: rev64 v0.8h, v0.8h
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: ret
entry:
%V128 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -69,9 +68,8 @@ entry:
define <16 x i8> @v16i8(<16 x i8> %a) {
; CHECK-LABEL: v16i8:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: adrp x8, .LCPI6_0
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0]
-; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-NEXT: rev64 v0.16b, v0.16b
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: ret
entry:
%V128 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -136,9 +134,8 @@ entry:
define <8 x half> @v8f16(<8 x half> %a) {
; CHECK-LABEL: v8f16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: adrp x8, .LCPI12_0
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0]
-; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-NEXT: rev64 v0.8h, v0.8h
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: ret
entry:
%V128 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
More information about the llvm-commits
mailing list