[llvm] [AArch64] Full reverse shuffles. (PR #119083)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 16 13:19:15 PST 2024
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/119083
>From eb4bc5e981f8c471579cb41cc9cad2a24d28058c Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 16 Dec 2024 21:18:33 +0000
Subject: [PATCH] [AArch64] Full reverse shuffles.
A full shuffle reverse needs to use EXT+REV64. This adds handling for v8s16 and
v16s8 types to match SDAG. Other types should be handled by perfect shuffles.
---
llvm/lib/Target/AArch64/AArch64Combine.td | 11 +-
.../GISel/AArch64PostLegalizerLowering.cpp | 13 ++
llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll | 14 +--
.../CodeGen/AArch64/neon-reverseshuffle.ll | 114 +++++++-----------
4 files changed, 74 insertions(+), 78 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 1b1d81fcd07a2b..ce1980697abbbb 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -131,6 +131,15 @@ def ext: GICombineRule <
(apply [{ applyEXT(*${root}, ${matchinfo}); }])
>;
+def fullrev: GICombineRule <
+ (defs root:$root, shuffle_matchdata:$matchinfo),
+ (match (G_IMPLICIT_DEF $src2),
+ (G_SHUFFLE_VECTOR $src, $src1, $src2, $mask):$root,
+ [{ return ShuffleVectorInst::isReverseMask(${mask}.getShuffleMask(),
+ ${mask}.getShuffleMask().size()); }]),
+ (apply [{ applyFullRev(*${root}, MRI); }])
+>;
+
def insertelt_nonconst: GICombineRule <
(defs root:$root, shuffle_matchdata:$matchinfo),
(match (wip_match_opcode G_INSERT_VECTOR_ELT):$root,
@@ -163,7 +172,7 @@ def form_duplane : GICombineRule <
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
>;
-def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
+def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, fullrev,
form_duplane, shuf_to_ins]>;
// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 56d70ffdece713..8130ea4b8902f0 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -405,6 +405,19 @@ void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
MI.eraseFromParent();
}
+void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ assert(DstTy.getSizeInBits() == 128 &&
+ "Expected 128bit vector in applyFullRev");
+ MachineIRBuilder MIRBuilder(MI);
+ auto Cst = MIRBuilder.buildConstant(LLT::scalar(32), 8);
+ auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {DstTy}, {Src});
+ MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});
+ MI.eraseFromParent();
+}
+
bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
index ee9fff7ceebc6e..f0c9dccb21d84a 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
@@ -440,11 +440,10 @@ define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
;
; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI14_0
-; CHECK-GI-NEXT: sshll v2.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI14_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
+; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
entry:
@@ -493,11 +492,10 @@ define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
;
; CHECK-GI-LABEL: shufzext_v8i8_v8i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI16_0
-; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI16_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
+; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
index abdfb996fa1668..db5b93282e9c43 100644
--- a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
@@ -23,19 +23,11 @@ entry:
}
define <4 x i32> @v4i32(<4 x i32> %a) {
-; CHECK-SD-LABEL: v4i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev64 v0.4s, v0.4s
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v4i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI2_0
-; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI2_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.4s, v0.4s
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
entry:
%V128 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x i32> %V128
@@ -52,19 +44,11 @@ entry:
}
define <8 x i16> @v8i16(<8 x i16> %a) {
-; CHECK-SD-LABEL: v8i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev64 v0.8h, v0.8h
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v8i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI4_0
-; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.8h, v0.8h
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
entry:
%V128 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x i16> %V128
@@ -93,6 +77,22 @@ entry:
ret <8 x i16> %V128
}
+define <4 x i16> @v8i16_3(<8 x i16> %a) {
+; CHECK-SD-LABEL: v8i16_3:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: rev64 v0.4h, v0.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v8i16_3:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %V128 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x i16> %V128
+}
+
define <4 x i16> @v4i16(<4 x i16> %a) {
; CHECK-LABEL: v4i16:
; CHECK: // %bb.0: // %entry
@@ -104,19 +104,11 @@ entry:
}
define <16 x i8> @v16i8(<16 x i8> %a) {
-; CHECK-SD-LABEL: v16i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev64 v0.16b, v0.16b
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v16i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI7_0
-; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI7_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.16b, v0.16b
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
entry:
%V128 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <16 x i8> %V128
@@ -125,18 +117,18 @@ entry:
define <16 x i8> @v16i8_2(<8 x i8> %a, <8 x i8> %b) {
; CHECK-SD-LABEL: v16i8_2:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: adrp x8, .LCPI8_0
+; CHECK-SD-NEXT: adrp x8, .LCPI9_0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI8_0]
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: v16i8_2:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI8_0
+; CHECK-GI-NEXT: adrp x8, .LCPI9_0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI8_0]
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
@@ -166,19 +158,11 @@ entry:
}
define <4 x float> @v4f32(<4 x float> %a) {
-; CHECK-SD-LABEL: v4f32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev64 v0.4s, v0.4s
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v4f32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI11_0
-; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI11_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v4f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.4s, v0.4s
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
entry:
%V128 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %V128
@@ -195,19 +179,11 @@ entry:
}
define <8 x half> @v8f16(<8 x half> %a) {
-; CHECK-SD-LABEL: v8f16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev64 v0.8h, v0.8h
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v8f16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI13_0
-; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI13_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v8f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.8h, v0.8h
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
entry:
%V128 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x half> %V128
More information about the llvm-commits
mailing list