[llvm] [AArch64][GlobalISel] Prefer DUPLANE to REV (PR #142725)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 8 23:45:19 PDT 2025
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/142725
>From 84f0b5609eb349ea4cee18a13d09b24690a3d214 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 9 Jun 2025 07:39:41 +0100
Subject: [PATCH] [AArch64][GlobalISel] Prefer DUPLANE to REV
Some shuffles containing undefs can match multiple instructions, such as
<3,u,u,u> being either a duplane or a rev. This changes the order that
different shuffles are considered, so that duplane is preferred which is
simpler and more likely to lead to further combines.
---
llvm/lib/Target/AArch64/AArch64Combine.td | 4 +-
.../GlobalISel/postlegalizer-lowering-ext.mir | 7 +--
.../GlobalISel/postlegalizer-lowering-rev.mir | 7 ++-
.../AArch64/arm64-neon-add-pairwise.ll | 2 +-
llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll | 54 +++++++------------
llvm/test/CodeGen/AArch64/bitcast.ll | 4 +-
llvm/test/CodeGen/AArch64/dup.ll | 18 +++----
llvm/test/CodeGen/AArch64/shufflevector.ll | 3 +-
8 files changed, 42 insertions(+), 57 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index f84e83816bf33..940d18a17b244 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -172,8 +172,8 @@ def form_duplane : GICombineRule <
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
>;
-def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, fullrev,
- form_duplane, shuf_to_ins]>;
+def shuffle_vector_lowering : GICombineGroup<[dup, form_duplane, rev, ext, zip,
+ uzp, trn, fullrev, shuf_to_ins]>;
// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
def vector_unmerge_lowering : GICombineRule <
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir
index 14d44d85e06f3..8dedb26dac2e1 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir
@@ -255,7 +255,8 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %v1:_(<8 x s16>) = COPY $q0
- ; CHECK-NEXT: %shuf:_(<8 x s16>) = G_REV64 %v1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %shuf:_(<8 x s16>) = G_DUPLANE16 %v1, [[C]](s64)
; CHECK-NEXT: $q0 = COPY %shuf(<8 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%v1:_(<8 x s16>) = COPY $q0
@@ -298,8 +299,8 @@ body: |
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: %v2:_(<2 x s64>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: %shuf:_(<2 x s64>) = G_TRN2 %v1, %v2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %shuf:_(<2 x s64>) = G_DUPLANE64 %v1, [[C]](s64)
; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%v1:_(<2 x s64>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-rev.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-rev.mir
index c5a6030155494..1d24f8acfbc53 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-rev.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-rev.mir
@@ -38,8 +38,11 @@ body: |
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
- ; CHECK-NEXT: [[REV64_:%[0-9]+]]:_(<2 x s32>) = G_REV64 [[COPY]]
- ; CHECK-NEXT: $d0 = COPY [[REV64_]](<2 x s32>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[DEF]](<2 x s32>)
+ ; CHECK-NEXT: [[DUPLANE32_:%[0-9]+]]:_(<2 x s32>) = G_DUPLANE32 [[CONCAT_VECTORS]], [[C]](s64)
+ ; CHECK-NEXT: $d0 = COPY [[DUPLANE32_]](<2 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
index 17fb312c63754..0ede4bc7a4d6c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
@@ -139,7 +139,7 @@ define i32 @addp_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: addp v0.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT: rev64 v1.2s, v0.2s
+; CHECK-GI-NEXT: dup v1.2s, v0.s[1]
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
index 7b439dd36c425..bb97ba6d92651 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
@@ -657,23 +657,14 @@ entry:
}
define i16 @test_vqrdmlahh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) {
-; CHECK-SD-LABEL: test_vqrdmlahh_lane_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmov s1, w0
-; CHECK-SD-NEXT: fmov s2, w1
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3]
-; CHECK-SD-NEXT: umov w0, v1.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqrdmlahh_lane_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: rev64 v0.4h, v0.4h
-; CHECK-GI-NEXT: fmov s1, w0
-; CHECK-GI-NEXT: fmov s2, w1
-; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h
-; CHECK-GI-NEXT: umov w0, v1.h[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqrdmlahh_lane_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, w0
+; CHECK-NEXT: fmov s2, w1
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3]
+; CHECK-NEXT: umov w0, v1.h[0]
+; CHECK-NEXT: ret
entry:
%0 = insertelement <4 x i16> undef, i16 %a, i64 0
%1 = insertelement <4 x i16> undef, i16 %b, i64 0
@@ -719,7 +710,7 @@ define i16 @test_vqrdmlahh_laneq_s16(i16 %a, i16 %b, <8 x i16> %c) {
;
; CHECK-GI-LABEL: test_vqrdmlahh_laneq_s16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #14
+; CHECK-GI-NEXT: dup v0.8h, v0.h[7]
; CHECK-GI-NEXT: fmov s1, w0
; CHECK-GI-NEXT: fmov s2, w1
; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h
@@ -837,23 +828,14 @@ entry:
}
define i16 @test_vqrdmlshh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) {
-; CHECK-SD-LABEL: test_vqrdmlshh_lane_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmov s1, w0
-; CHECK-SD-NEXT: fmov s2, w1
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3]
-; CHECK-SD-NEXT: umov w0, v1.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqrdmlshh_lane_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: rev64 v0.4h, v0.4h
-; CHECK-GI-NEXT: fmov s1, w0
-; CHECK-GI-NEXT: fmov s2, w1
-; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h
-; CHECK-GI-NEXT: umov w0, v1.h[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqrdmlshh_lane_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, w0
+; CHECK-NEXT: fmov s2, w1
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3]
+; CHECK-NEXT: umov w0, v1.h[0]
+; CHECK-NEXT: ret
entry:
%0 = insertelement <4 x i16> undef, i16 %a, i64 0
%1 = insertelement <4 x i16> undef, i16 %b, i64 0
@@ -899,7 +881,7 @@ define i16 @test_vqrdmlshh_laneq_s16(i16 %a, i16 %b, <8 x i16> %c) {
;
; CHECK-GI-LABEL: test_vqrdmlshh_laneq_s16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #14
+; CHECK-GI-NEXT: dup v0.8h, v0.h[7]
; CHECK-GI-NEXT: fmov s1, w0
; CHECK-GI-NEXT: fmov s2, w1
; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h
diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll
index 38cec0d71a945..d2f72ecacc86c 100644
--- a/llvm/test/CodeGen/AArch64/bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast.ll
@@ -15,7 +15,7 @@ define <4 x i16> @foo1(<2 x i32> %a) {
; CHECK-GI-NEXT: mov w8, #58712 // =0xe558
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: rev32 v0.4h, v0.4h
+; CHECK-GI-NEXT: dup v0.4h, v0.h[1]
; CHECK-GI-NEXT: ret
%1 = shufflevector <2 x i32> <i32 58712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
; Can't optimize the following bitcast to scalar_to_vector.
@@ -35,7 +35,7 @@ define <4 x i16> @foo2(<2 x i32> %a) {
; CHECK-GI-NEXT: mov w8, #712 // =0x2c8
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: rev32 v0.4h, v0.4h
+; CHECK-GI-NEXT: dup v0.4h, v0.h[1]
; CHECK-GI-NEXT: ret
%1 = shufflevector <2 x i32> <i32 712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
; Can't optimize the following bitcast to scalar_to_vector.
diff --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll
index 26e070f2a0acd..079ff1076b110 100644
--- a/llvm/test/CodeGen/AArch64/dup.ll
+++ b/llvm/test/CodeGen/AArch64/dup.ll
@@ -1025,13 +1025,12 @@ define <3 x i64> @duplane0_v3i64(<3 x i64> %b) {
;
; CHECK-GI-LABEL: duplane0_v3i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: fmov d2, d0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: mov v2.d[1], v1.d[0]
-; CHECK-GI-NEXT: dup v0.2d, v2.d[0]
-; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: dup v0.2d, v0.d[0]
; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: fmov d2, d0
; CHECK-GI-NEXT: ret
entry:
%c = shufflevector <3 x i64> %b, <3 x i64> poison, <3 x i32> zeroinitializer
@@ -2354,13 +2353,12 @@ define <3 x double> @duplane0_v3double(<3 x double> %b) {
;
; CHECK-GI-LABEL: duplane0_v3double:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: fmov d2, d0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: mov v2.d[1], v1.d[0]
-; CHECK-GI-NEXT: dup v0.2d, v2.d[0]
-; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: dup v0.2d, v0.d[0]
; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: fmov d2, d0
; CHECK-GI-NEXT: ret
entry:
%c = shufflevector <3 x double> %b, <3 x double> poison, <3 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll
index 59cc400e8ac73..9fd5e65086782 100644
--- a/llvm/test/CodeGen/AArch64/shufflevector.ll
+++ b/llvm/test/CodeGen/AArch64/shufflevector.ll
@@ -399,7 +399,8 @@ define <3 x ptr> @shufflevector_v3p0(<3 x ptr> %a, <3 x ptr> %b) {
; CHECK-GI-NEXT: fmov x9, d4
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT: fmov d2, d5
+; CHECK-GI-NEXT: // kill: def $d5 killed $d5 def $q5
+; CHECK-GI-NEXT: dup v2.2d, v5.d[0]
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: mov v3.d[1], x9
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v3.16b, #8
More information about the llvm-commits
mailing list