[llvm] [AArch64][GlobalISel] Prefer DUPLANE to REV (PR #142725)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 4 00:00:54 PDT 2025
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/142725
Some shuffles containing undefs can match multiple instructions, such as <3,u,u,u> being either a duplane or a rev. This changes the order that different shuffles are considered, so that duplane is preferred which is simpler and more likely to lead to further combines.
>From d06a574998b43c81436491df0f82e649ba59f511 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 4 Jun 2025 07:57:48 +0100
Subject: [PATCH] [AArch64][GlobalISel] Prefer DUPLANE to REV
Some shuffles containing undefs can match multiple instructions, such as
<3,u,u,u> being either a duplane or a rev. This changes the order that
different shuffles are considered, so that duplane is preferred which is
simpler and more likely to lead to further combines.
---
llvm/lib/Target/AArch64/AArch64Combine.td | 4 +-
.../GlobalISel/postlegalizer-lowering-ext.mir | 7 +--
.../GlobalISel/postlegalizer-lowering-rev.mir | 7 ++-
.../AArch64/arm64-neon-add-pairwise.ll | 2 +-
llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll | 54 +++++++------------
llvm/test/CodeGen/AArch64/bitcast.ll | 4 +-
llvm/test/CodeGen/AArch64/dup.ll | 18 +++----
llvm/test/CodeGen/AArch64/shufflevector.ll | 3 +-
8 files changed, 42 insertions(+), 57 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index f84e83816bf33..940d18a17b244 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -172,8 +172,8 @@ def form_duplane : GICombineRule <
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
>;
-def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, fullrev,
- form_duplane, shuf_to_ins]>;
+def shuffle_vector_lowering : GICombineGroup<[dup, form_duplane, rev, ext, zip,
+ uzp, trn, fullrev, shuf_to_ins]>;
// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
def vector_unmerge_lowering : GICombineRule <
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir
index 14d44d85e06f3..8dedb26dac2e1 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir
@@ -255,7 +255,8 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %v1:_(<8 x s16>) = COPY $q0
- ; CHECK-NEXT: %shuf:_(<8 x s16>) = G_REV64 %v1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %shuf:_(<8 x s16>) = G_DUPLANE16 %v1, [[C]](s64)
; CHECK-NEXT: $q0 = COPY %shuf(<8 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%v1:_(<8 x s16>) = COPY $q0
@@ -298,8 +299,8 @@ body: |
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: %v2:_(<2 x s64>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: %shuf:_(<2 x s64>) = G_TRN2 %v1, %v2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %shuf:_(<2 x s64>) = G_DUPLANE64 %v1, [[C]](s64)
; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%v1:_(<2 x s64>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-rev.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-rev.mir
index c5a6030155494..1d24f8acfbc53 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-rev.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-rev.mir
@@ -38,8 +38,11 @@ body: |
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
- ; CHECK-NEXT: [[REV64_:%[0-9]+]]:_(<2 x s32>) = G_REV64 [[COPY]]
- ; CHECK-NEXT: $d0 = COPY [[REV64_]](<2 x s32>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[DEF]](<2 x s32>)
+ ; CHECK-NEXT: [[DUPLANE32_:%[0-9]+]]:_(<2 x s32>) = G_DUPLANE32 [[CONCAT_VECTORS]], [[C]](s64)
+ ; CHECK-NEXT: $d0 = COPY [[DUPLANE32_]](<2 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
index 17fb312c63754..0ede4bc7a4d6c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
@@ -139,7 +139,7 @@ define i32 @addp_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: addp v0.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT: rev64 v1.2s, v0.2s
+; CHECK-GI-NEXT: dup v1.2s, v0.s[1]
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
index bbea8f7b93f02..0b96bef656b03 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
@@ -677,23 +677,14 @@ entry:
}
define i16 @test_vqrdmlahh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) {
-; CHECK-SD-LABEL: test_vqrdmlahh_lane_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmov s1, w0
-; CHECK-SD-NEXT: fmov s2, w1
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3]
-; CHECK-SD-NEXT: umov w0, v1.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqrdmlahh_lane_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: rev64 v0.4h, v0.4h
-; CHECK-GI-NEXT: fmov s1, w0
-; CHECK-GI-NEXT: fmov s2, w1
-; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h
-; CHECK-GI-NEXT: umov w0, v1.h[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqrdmlahh_lane_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, w0
+; CHECK-NEXT: fmov s2, w1
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3]
+; CHECK-NEXT: umov w0, v1.h[0]
+; CHECK-NEXT: ret
entry:
%0 = insertelement <4 x i16> undef, i16 %a, i64 0
%1 = insertelement <4 x i16> undef, i16 %b, i64 0
@@ -739,7 +730,7 @@ define i16 @test_vqrdmlahh_laneq_s16(i16 %a, i16 %b, <8 x i16> %c) {
;
; CHECK-GI-LABEL: test_vqrdmlahh_laneq_s16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #14
+; CHECK-GI-NEXT: dup v0.8h, v0.h[7]
; CHECK-GI-NEXT: fmov s1, w0
; CHECK-GI-NEXT: fmov s2, w1
; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h
@@ -857,23 +848,14 @@ entry:
}
define i16 @test_vqrdmlshh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) {
-; CHECK-SD-LABEL: test_vqrdmlshh_lane_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmov s1, w0
-; CHECK-SD-NEXT: fmov s2, w1
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3]
-; CHECK-SD-NEXT: umov w0, v1.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqrdmlshh_lane_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: rev64 v0.4h, v0.4h
-; CHECK-GI-NEXT: fmov s1, w0
-; CHECK-GI-NEXT: fmov s2, w1
-; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h
-; CHECK-GI-NEXT: umov w0, v1.h[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqrdmlshh_lane_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, w0
+; CHECK-NEXT: fmov s2, w1
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3]
+; CHECK-NEXT: umov w0, v1.h[0]
+; CHECK-NEXT: ret
entry:
%0 = insertelement <4 x i16> undef, i16 %a, i64 0
%1 = insertelement <4 x i16> undef, i16 %b, i64 0
@@ -919,7 +901,7 @@ define i16 @test_vqrdmlshh_laneq_s16(i16 %a, i16 %b, <8 x i16> %c) {
;
; CHECK-GI-LABEL: test_vqrdmlshh_laneq_s16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #14
+; CHECK-GI-NEXT: dup v0.8h, v0.h[7]
; CHECK-GI-NEXT: fmov s1, w0
; CHECK-GI-NEXT: fmov s2, w1
; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h
diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll
index d54cc4adb81b3..7163858b06652 100644
--- a/llvm/test/CodeGen/AArch64/bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast.ll
@@ -15,7 +15,7 @@ define <4 x i16> @foo1(<2 x i32> %a) {
; CHECK-GI-NEXT: mov w8, #58712 // =0xe558
; CHECK-GI-NEXT: mov v1.s[0], w8
; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: rev32 v0.4h, v0.4h
+; CHECK-GI-NEXT: dup v0.4h, v0.h[1]
; CHECK-GI-NEXT: ret
%1 = shufflevector <2 x i32> <i32 58712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
; Can't optimize the following bitcast to scalar_to_vector.
@@ -35,7 +35,7 @@ define <4 x i16> @foo2(<2 x i32> %a) {
; CHECK-GI-NEXT: mov w8, #712 // =0x2c8
; CHECK-GI-NEXT: mov v1.s[0], w8
; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: rev32 v0.4h, v0.4h
+; CHECK-GI-NEXT: dup v0.4h, v0.h[1]
; CHECK-GI-NEXT: ret
%1 = shufflevector <2 x i32> <i32 712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
; Can't optimize the following bitcast to scalar_to_vector.
diff --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll
index bfc0ef0826f68..a3f6dec186df2 100644
--- a/llvm/test/CodeGen/AArch64/dup.ll
+++ b/llvm/test/CodeGen/AArch64/dup.ll
@@ -675,13 +675,12 @@ define <3 x i64> @duplane0_v3i64(<3 x i64> %b) {
;
; CHECK-GI-LABEL: duplane0_v3i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: fmov d2, d0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: mov v2.d[1], v1.d[0]
-; CHECK-GI-NEXT: dup v0.2d, v2.d[0]
-; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: dup v0.2d, v0.d[0]
; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: fmov d2, d0
; CHECK-GI-NEXT: ret
entry:
%c = shufflevector <3 x i64> %b, <3 x i64> poison, <3 x i32> zeroinitializer
@@ -1517,13 +1516,12 @@ define <3 x double> @duplane0_v3double(<3 x double> %b) {
;
; CHECK-GI-LABEL: duplane0_v3double:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: fmov d2, d0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: mov v2.d[1], v1.d[0]
-; CHECK-GI-NEXT: dup v0.2d, v2.d[0]
-; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: dup v0.2d, v0.d[0]
; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: fmov d2, d0
; CHECK-GI-NEXT: ret
entry:
%c = shufflevector <3 x double> %b, <3 x double> poison, <3 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll
index e5c07e0c81e35..c78965731ea42 100644
--- a/llvm/test/CodeGen/AArch64/shufflevector.ll
+++ b/llvm/test/CodeGen/AArch64/shufflevector.ll
@@ -406,8 +406,9 @@ define <3 x ptr> @shufflevector_v3p0(<3 x ptr> %a, <3 x ptr> %b) {
; CHECK-GI-NEXT: fmov x8, d5
; CHECK-GI-NEXT: mov v1.d[0], x8
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v2.16b, #8
-; CHECK-GI-NEXT: fmov x10, d1
+; CHECK-GI-NEXT: dup v1.2d, v1.d[0]
; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: fmov x10, d1
; CHECK-GI-NEXT: fmov d1, d2
; CHECK-GI-NEXT: fmov d2, x10
; CHECK-GI-NEXT: ret
More information about the llvm-commits
mailing list