[llvm] 01c8cd6 - [AArch64][GlobalISel] Full reverse shuffles. (#119083)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 29 07:56:16 PST 2024
Author: David Green
Date: 2024-12-29T15:56:12Z
New Revision: 01c8cd664a9bea23a49c863a39351949ac11a4fd
URL: https://github.com/llvm/llvm-project/commit/01c8cd664a9bea23a49c863a39351949ac11a4fd
DIFF: https://github.com/llvm/llvm-project/commit/01c8cd664a9bea23a49c863a39351949ac11a4fd.diff
LOG: [AArch64][GlobalISel] Full reverse shuffles. (#119083)
A full shuffle reverse needs to use EXT+REV64. This adds handling for more types than SDAG so long as the mask is isReverseMask to make the patterns simpler.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64Combine.td
llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 1b1d81fcd07a2b..ce1980697abbbb 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -131,6 +131,15 @@ def ext: GICombineRule <
(apply [{ applyEXT(*${root}, ${matchinfo}); }])
>;
+def fullrev: GICombineRule <
+ (defs root:$root, shuffle_matchdata:$matchinfo),
+ (match (G_IMPLICIT_DEF $src2),
+ (G_SHUFFLE_VECTOR $src, $src1, $src2, $mask):$root,
+ [{ return ShuffleVectorInst::isReverseMask(${mask}.getShuffleMask(),
+ ${mask}.getShuffleMask().size()); }]),
+ (apply [{ applyFullRev(*${root}, MRI); }])
+>;
+
def insertelt_nonconst: GICombineRule <
(defs root:$root, shuffle_matchdata:$matchinfo),
(match (wip_match_opcode G_INSERT_VECTOR_ELT):$root,
@@ -163,7 +172,7 @@ def form_duplane : GICombineRule <
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
>;
-def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
+def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, fullrev,
form_duplane, shuf_to_ins]>;
// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 5fe2e3cefa1125..6bba70d45a61df 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -405,6 +405,19 @@ void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
MI.eraseFromParent();
}
+void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ assert(DstTy.getSizeInBits() == 128 &&
+ "Expected 128bit vector in applyFullRev");
+ MachineIRBuilder MIRBuilder(MI);
+ auto Cst = MIRBuilder.buildConstant(LLT::scalar(32), 8);
+ auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {DstTy}, {Src});
+ MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});
+ MI.eraseFromParent();
+}
+
bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
index ee9fff7ceebc6e..f0c9dccb21d84a 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
@@ -440,11 +440,10 @@ define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
;
; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI14_0
-; CHECK-GI-NEXT: sshll v2.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI14_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
+; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
entry:
@@ -493,11 +492,10 @@ define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
;
; CHECK-GI-LABEL: shufzext_v8i8_v8i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI16_0
-; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI16_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
+; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
index abdfb996fa1668..db5b93282e9c43 100644
--- a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
@@ -23,19 +23,11 @@ entry:
}
define <4 x i32> @v4i32(<4 x i32> %a) {
-; CHECK-SD-LABEL: v4i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev64 v0.4s, v0.4s
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v4i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI2_0
-; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI2_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.4s, v0.4s
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
entry:
%V128 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x i32> %V128
@@ -52,19 +44,11 @@ entry:
}
define <8 x i16> @v8i16(<8 x i16> %a) {
-; CHECK-SD-LABEL: v8i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev64 v0.8h, v0.8h
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v8i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI4_0
-; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.8h, v0.8h
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
entry:
%V128 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x i16> %V128
@@ -93,6 +77,22 @@ entry:
ret <8 x i16> %V128
}
+define <4 x i16> @v8i16_3(<8 x i16> %a) {
+; CHECK-SD-LABEL: v8i16_3:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: rev64 v0.4h, v0.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v8i16_3:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %V128 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x i16> %V128
+}
+
define <4 x i16> @v4i16(<4 x i16> %a) {
; CHECK-LABEL: v4i16:
; CHECK: // %bb.0: // %entry
@@ -104,19 +104,11 @@ entry:
}
define <16 x i8> @v16i8(<16 x i8> %a) {
-; CHECK-SD-LABEL: v16i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev64 v0.16b, v0.16b
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v16i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI7_0
-; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI7_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.16b, v0.16b
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
entry:
%V128 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <16 x i8> %V128
@@ -125,18 +117,18 @@ entry:
define <16 x i8> @v16i8_2(<8 x i8> %a, <8 x i8> %b) {
; CHECK-SD-LABEL: v16i8_2:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: adrp x8, .LCPI8_0
+; CHECK-SD-NEXT: adrp x8, .LCPI9_0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI8_0]
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: v16i8_2:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI8_0
+; CHECK-GI-NEXT: adrp x8, .LCPI9_0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI8_0]
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
@@ -166,19 +158,11 @@ entry:
}
define <4 x float> @v4f32(<4 x float> %a) {
-; CHECK-SD-LABEL: v4f32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev64 v0.4s, v0.4s
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v4f32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI11_0
-; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI11_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v4f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.4s, v0.4s
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
entry:
%V128 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %V128
@@ -195,19 +179,11 @@ entry:
}
define <8 x half> @v8f16(<8 x half> %a) {
-; CHECK-SD-LABEL: v8f16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev64 v0.8h, v0.8h
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v8f16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI13_0
-; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI13_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v8f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.8h, v0.8h
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
entry:
%V128 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x half> %V128
More information about the llvm-commits
mailing list