[llvm] [AArch64][GlobalISel] Add combine for build_vector(unmerge, unmerge, undef, undef) (PR #165539)
Ryan Cowan via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 7 02:24:04 PST 2025
https://github.com/HolyMolyCowMan updated https://github.com/llvm/llvm-project/pull/165539
>From 3afd4eba40468069f4ceb95f7470e4344e9fc5b1 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Wed, 29 Oct 2025 10:26:54 +0000
Subject: [PATCH 1/9] [AArch64][GlobalISel] Add combine for
build_vector(unmerge, unmerge, undef, undef)
---
llvm/lib/Target/AArch64/AArch64Combine.td | 9 +-
.../GISel/AArch64PostLegalizerCombiner.cpp | 96 ++++++++++++++++++-
llvm/test/CodeGen/AArch64/fptrunc.ll | 18 +---
llvm/test/CodeGen/AArch64/itofp.ll | 48 +++-------
4 files changed, 120 insertions(+), 51 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 278314792bfb9..056e6145487d2 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -332,6 +332,13 @@ def combine_mul_cmlt : GICombineRule<
(apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }])
>;
+def combine_build_unmerge : GICombineRule<
+ (defs root:$root, register_matchinfo:$unmergeSrc),
+ (match (wip_match_opcode G_BUILD_VECTOR):$root,
+ [{ return matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]),
+ (apply [{ applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }])
+>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@@ -366,7 +373,7 @@ def AArch64PostLegalizerCombiner
select_to_minmax, or_to_bsp, combine_concat_vector,
commute_constant_to_rhs, extract_vec_elt_combines,
push_freeze_to_prevent_poison_from_propagating,
- combine_mul_cmlt, combine_use_vector_truncate,
+ combine_mul_cmlt, combine_use_vector_truncate, combine_build_unmerge,
extmultomull, truncsat_combines, lshr_of_trunc_of_lshr,
funnel_shift_from_or_shift_constants_are_legal]> {
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index fa7bb6ecc35ee..2f17fd33559ee 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -39,6 +39,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
+#include <set>
#define GET_GICOMBINER_DEPS
#include "AArch64GenPostLegalizeGICombiner.inc"
@@ -133,6 +134,99 @@ bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
}
+// This pattern aims to match the following shape to avoid extra mov
+// instructions
+// G_BUILD_VECTOR(
+// G_UNMERGE_VALUES(src, 0)
+// G_UNMERGE_VALUES(src, 1)
+// G_IMPLICIT_DEF
+// G_IMPLICIT_DEF
+// )
+// ->
+// G_CONCAT_VECTORS(
+// undef
+// src
+// )
+bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
+ Register &UnmergeSrc) {
+ assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+
+ unsigned UnmergeInstrCount = 0;
+ unsigned UndefInstrCount = 0;
+
+ unsigned UnmergeEltCount = 0;
+ unsigned UnmergeEltSize = 0;
+
+ Register UnmergeSrcTemp;
+
+ std::set<int> KnownRegs;
+
+ for (auto Use : MI.all_uses()) {
+ auto *Def = getDefIgnoringCopies(Use.getReg(), MRI);
+
+ if (!Def) {
+ return false;
+ }
+
+ unsigned Opcode = Def->getOpcode();
+
+ switch (Opcode) {
+ default:
+ return false;
+ case TargetOpcode::G_IMPLICIT_DEF:
+ ++UndefInstrCount;
+ break;
+ case TargetOpcode::G_UNMERGE_VALUES:
+ ++UnmergeInstrCount;
+
+ UnmergeEltSize = MRI.getType(Use.getReg()).getScalarSizeInBits();
+ UnmergeEltCount = Def->getNumDefs();
+ if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 &&
+ UnmergeEltSize * UnmergeEltCount != 128)) {
+ return false;
+ }
+
+ // Unmerge should only use one register so we can use the last one
+ for (auto UnmergeUse : Def->all_uses())
+ UnmergeSrcTemp = UnmergeUse.getReg();
+
+ // Track unique sources for the G_UNMERGE_VALUES
+ unsigned RegId = UnmergeSrcTemp.id();
+ if (KnownRegs.find(RegId) != KnownRegs.end())
+ continue;
+
+ KnownRegs.insert(RegId);
+
+ // We know the unmerge is a valid target now so store the register.
+ UnmergeSrc = UnmergeSrcTemp;
+
+ break;
+ }
+ }
+
+ // Only want to match patterns that pad half of a vector with undefined. We
+ // also want to ensure that these values come from a single unmerge and all
+ // unmerged values are consumed.
+ if (UndefInstrCount != UnmergeInstrCount ||
+ UnmergeEltCount != UnmergeInstrCount || KnownRegs.size() != 1) {
+ return false;
+ }
+
+ return true;
+}
+
+void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, Register &UnmergeSrc) {
+ assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
+ B.setInstrAndDebugLoc(MI);
+
+ Register UndefVec = MRI.createGenericVirtualRegister(MRI.getType(UnmergeSrc));
+ B.buildUndef(UndefVec);
+ B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
+
+ MI.eraseFromParent();
+}
+
bool matchAArch64MulConstCombine(
MachineInstr &MI, MachineRegisterInfo &MRI,
std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
@@ -890,4 +984,4 @@ namespace llvm {
FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) {
return new AArch64PostLegalizerCombiner(IsOptNone);
}
-} // end namespace llvm
+} // end namespace llvm
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll
index 1f84c944d7c16..ce1a2fc48c2e7 100644
--- a/llvm/test/CodeGen/AArch64/fptrunc.ll
+++ b/llvm/test/CodeGen/AArch64/fptrunc.ll
@@ -345,19 +345,11 @@ entry:
}
define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) {
-; CHECK-SD-LABEL: fptrunc_v2f32_v2f16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fptrunc_v2f32_v2f16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fptrunc_v2f32_v2f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-NEXT: ret
entry:
%c = fptrunc <2 x float> %a to <2 x half>
ret <2 x half> %c
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index caf87a13f283b..6d168edf180a4 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -5763,18 +5763,14 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) {
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: scvtf v0.2d, v0.2d
; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stofp_v2i64_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: scvtf v0.2d, v0.2d
; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = sitofp <2 x i64> %a to <2 x half>
@@ -5808,18 +5804,14 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) {
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2d, v0.2d
; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: utofp_v2i64_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: ucvtf v0.2d, v0.2d
; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = uitofp <2 x i64> %a to <2 x half>
@@ -6232,17 +6224,13 @@ define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) {
; CHECK-NOFP16-GI-LABEL: stofp_v2i32_v2f16:
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stofp_v2i32_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: scvtf v0.2s, v0.2s
-; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = sitofp <2 x i32> %a to <2 x half>
@@ -6267,17 +6255,13 @@ define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) {
; CHECK-NOFP16-GI-LABEL: utofp_v2i32_v2f16:
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: utofp_v2i32_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: ucvtf v0.2s, v0.2s
-; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = uitofp <2 x i32> %a to <2 x half>
@@ -6480,9 +6464,7 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) {
; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #16
; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #16
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
entry:
%c = sitofp <2 x i16> %a to <2 x half>
@@ -6509,9 +6491,7 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) {
; CHECK-NOFP16-GI-NEXT: movi d1, #0x00ffff0000ffff
; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
entry:
%c = uitofp <2 x i16> %a to <2 x half>
@@ -6766,9 +6746,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #24
; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stofp_v2i8_v2f16:
@@ -6817,9 +6795,7 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
; CHECK-NOFP16-GI-NEXT: movi d1, #0x0000ff000000ff
; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: utofp_v2i8_v2f16:
>From 372742e8a80a52bbd5c4f779bc94266f4b0bbe4a Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Wed, 29 Oct 2025 11:15:21 +0000
Subject: [PATCH 2/9] Fix unnecessary copying from using auto.
---
.../lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 2f17fd33559ee..c768d4b7a9a1c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -161,7 +161,7 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
std::set<int> KnownRegs;
- for (auto Use : MI.all_uses()) {
+ for (auto &Use : MI.all_uses()) {
auto *Def = getDefIgnoringCopies(Use.getReg(), MRI);
if (!Def) {
@@ -187,7 +187,7 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
}
// Unmerge should only use one register so we can use the last one
- for (auto UnmergeUse : Def->all_uses())
+ for (auto &UnmergeUse : Def->all_uses())
UnmergeSrcTemp = UnmergeUse.getReg();
// Track unique sources for the G_UNMERGE_VALUES
>From a0376ab6ba0a31cd6f318fee1c8e210503dac715 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Thu, 30 Oct 2025 12:06:59 +0000
Subject: [PATCH 3/9] Remove wip_match_opcode and check operand order and
consecutiveness
---
llvm/lib/Target/AArch64/AArch64Combine.td | 2 +-
.../GISel/AArch64PostLegalizerCombiner.cpp | 36 +++++++++++++++----
2 files changed, 31 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 056e6145487d2..5d6feeaa363e8 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -334,7 +334,7 @@ def combine_mul_cmlt : GICombineRule<
def combine_build_unmerge : GICombineRule<
(defs root:$root, register_matchinfo:$unmergeSrc),
- (match (wip_match_opcode G_BUILD_VECTOR):$root,
+ (match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
[{ return matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]),
(apply [{ applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }])
>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index c768d4b7a9a1c..33de0af5808e2 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -144,20 +144,24 @@ bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
// )
// ->
// G_CONCAT_VECTORS(
+// src,
// undef
-// src
// )
bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
Register &UnmergeSrc) {
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
- unsigned UnmergeInstrCount = 0;
+ unsigned UnmergeUseCount = 0;
unsigned UndefInstrCount = 0;
unsigned UnmergeEltCount = 0;
unsigned UnmergeEltSize = 0;
+ unsigned BuildOperandCount = MI.getNumOperands();
+ bool EncounteredUndef = false;
+
Register UnmergeSrcTemp;
+ MachineInstr *UnmergeInstr;
std::set<int> KnownRegs;
@@ -170,14 +174,21 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
unsigned Opcode = Def->getOpcode();
+ // Ensure that the unmerged instructions are consecutive and before the
+ // undefined values by checking we don't encounter an undef before we reach
+ // half way
+ if (EncounteredUndef && UnmergeUseCount < BuildOperandCount / 2)
+ return false;
+
switch (Opcode) {
default:
return false;
case TargetOpcode::G_IMPLICIT_DEF:
++UndefInstrCount;
+ EncounteredUndef = true;
break;
case TargetOpcode::G_UNMERGE_VALUES:
- ++UnmergeInstrCount;
+ ++UnmergeUseCount;
UnmergeEltSize = MRI.getType(Use.getReg()).getScalarSizeInBits();
UnmergeEltCount = Def->getNumDefs();
@@ -197,8 +208,10 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
KnownRegs.insert(RegId);
- // We know the unmerge is a valid target now so store the register.
+ // We know the unmerge is a valid target now so store the register & the
+ // instruction.
UnmergeSrc = UnmergeSrcTemp;
+ UnmergeInstr = Def;
break;
}
@@ -207,11 +220,22 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
// Only want to match patterns that pad half of a vector with undefined. We
// also want to ensure that these values come from a single unmerge and all
// unmerged values are consumed.
- if (UndefInstrCount != UnmergeInstrCount ||
- UnmergeEltCount != UnmergeInstrCount || KnownRegs.size() != 1) {
+ if (UndefInstrCount != UnmergeUseCount ||
+ UnmergeEltCount != UnmergeUseCount || KnownRegs.size() != 1) {
return false;
}
+ // Check the operands of the unmerge are used in the same order they are
+ // defined G_BUILD_VECTOR always defines 1 output so we know the uses start
+ // from index 1
+ for (unsigned OperandIndex = 0; OperandIndex < UnmergeUseCount;
+ ++OperandIndex) {
+ Register BuildReg = MI.getOperand(OperandIndex + 1).getReg();
+ Register UnmergeReg = UnmergeInstr->getOperand(OperandIndex).getReg();
+ if (BuildReg != UnmergeReg)
+ return false;
+ }
+
return true;
}
>From 3f8d7da31402bf0af864060f60442b1d51955d40 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 3 Nov 2025 10:12:49 +0000
Subject: [PATCH 4/9] Simplify logic & check for legality of resulting
concatenation
---
.../GISel/AArch64PostLegalizerCombiner.cpp | 100 ++++++------------
1 file changed, 31 insertions(+), 69 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 33de0af5808e2..5d6b47806722d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -151,91 +151,53 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
Register &UnmergeSrc) {
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
- unsigned UnmergeUseCount = 0;
- unsigned UndefInstrCount = 0;
+ unsigned BuildUseCount = MI.getNumOperands() - 1;
- unsigned UnmergeEltCount = 0;
- unsigned UnmergeEltSize = 0;
-
- unsigned BuildOperandCount = MI.getNumOperands();
- bool EncounteredUndef = false;
+ if (BuildUseCount % 2 != 0)
+ return false;
- Register UnmergeSrcTemp;
- MachineInstr *UnmergeInstr;
+ unsigned HalfWayIndex = BuildUseCount / 2;
- std::set<int> KnownRegs;
+ // Check the first operand is an unmerge
+ auto *MaybeUnmerge = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
+ if (MaybeUnmerge->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
+ return false;
- for (auto &Use : MI.all_uses()) {
- auto *Def = getDefIgnoringCopies(Use.getReg(), MRI);
+ // Check that the resultant concat will be legal
+ auto UnmergeEltSize =
+ MRI.getType(MaybeUnmerge->getOperand(1).getReg()).getScalarSizeInBits();
+ auto UnmergeEltCount = MaybeUnmerge->getNumDefs();
- if (!Def) {
- return false;
- }
+ if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 &&
+ UnmergeEltSize * UnmergeEltCount != 128))
+ return false;
- unsigned Opcode = Def->getOpcode();
+ // Check that all of the operands before the midpoint come from the same
+ // unmerge and are in the same order as they are used in the build_vector
+ for (unsigned I = 0; I < HalfWayIndex; ++I) {
+ auto MaybeUnmergeReg = MI.getOperand(I + 1).getReg();
+ auto *Unmerge = getDefIgnoringCopies(MaybeUnmergeReg, MRI);
- // Ensure that the unmerged instructions are consecutive and before the
- // undefined values by checking we don't encounter an undef before we reach
- // half way
- if (EncounteredUndef && UnmergeUseCount < BuildOperandCount / 2)
+ if (Unmerge != MaybeUnmerge)
return false;
- switch (Opcode) {
- default:
+ if (Unmerge->getOperand(I).getReg() != MaybeUnmergeReg)
return false;
- case TargetOpcode::G_IMPLICIT_DEF:
- ++UndefInstrCount;
- EncounteredUndef = true;
- break;
- case TargetOpcode::G_UNMERGE_VALUES:
- ++UnmergeUseCount;
-
- UnmergeEltSize = MRI.getType(Use.getReg()).getScalarSizeInBits();
- UnmergeEltCount = Def->getNumDefs();
- if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 &&
- UnmergeEltSize * UnmergeEltCount != 128)) {
- return false;
- }
-
- // Unmerge should only use one register so we can use the last one
- for (auto &UnmergeUse : Def->all_uses())
- UnmergeSrcTemp = UnmergeUse.getReg();
-
- // Track unique sources for the G_UNMERGE_VALUES
- unsigned RegId = UnmergeSrcTemp.id();
- if (KnownRegs.find(RegId) != KnownRegs.end())
- continue;
-
- KnownRegs.insert(RegId);
-
- // We know the unmerge is a valid target now so store the register & the
- // instruction.
- UnmergeSrc = UnmergeSrcTemp;
- UnmergeInstr = Def;
-
- break;
- }
}
- // Only want to match patterns that pad half of a vector with undefined. We
- // also want to ensure that these values come from a single unmerge and all
- // unmerged values are consumed.
- if (UndefInstrCount != UnmergeUseCount ||
- UnmergeEltCount != UnmergeUseCount || KnownRegs.size() != 1) {
- return false;
- }
+ // Check that all of the operands after the mid point are undefs.
+ for (unsigned I = HalfWayIndex; I < BuildUseCount; ++I) {
+ auto *Undef = getDefIgnoringCopies(MI.getOperand(I + 1).getReg(), MRI);
- // Check the operands of the unmerge are used in the same order they are
- // defined G_BUILD_VECTOR always defines 1 output so we know the uses start
- // from index 1
- for (unsigned OperandIndex = 0; OperandIndex < UnmergeUseCount;
- ++OperandIndex) {
- Register BuildReg = MI.getOperand(OperandIndex + 1).getReg();
- Register UnmergeReg = UnmergeInstr->getOperand(OperandIndex).getReg();
- if (BuildReg != UnmergeReg)
+ if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
return false;
}
+ // Unmerge should only use one register so we can use the last one
+ for (auto &UnmergeUse :
+ getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI)->all_uses())
+ UnmergeSrc = UnmergeUse.getReg();
+
return true;
}
>From 69ba5ab26d6bad07c0e0c655d55733160ff7186f Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 3 Nov 2025 10:13:58 +0000
Subject: [PATCH 5/9] Make this a non-backend specific optimisation & check all
unmerged values are used
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 6 ++
.../include/llvm/Target/GlobalISel/Combine.td | 12 ++-
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 85 +++++++++++++++++++
llvm/lib/Target/AArch64/AArch64Combine.td | 9 +-
.../GISel/AArch64PostLegalizerCombiner.cpp | 79 -----------------
5 files changed, 103 insertions(+), 88 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 36cb90b1bc134..968bbbf778254 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -647,6 +647,12 @@ class CombinerHelper {
bool matchRotateOutOfRange(MachineInstr &MI) const;
void applyRotateOutOfRange(MachineInstr &MI) const;
+ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
+ Register &UnmergeSrc) const;
+ void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ Register &UnmergeSrc) const;
+
bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const;
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 119695e53c3cb..0ab2d9487a295 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -921,6 +921,15 @@ def merge_of_x_and_zero : GICombineRule <
[{ return Helper.matchMergeXAndZero(*${MI}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${MI}, ${matchinfo}); }])>;
+// Transform build_vector(unmerge(src, 0), ... unmerge(src, n), undef, ..., undef)
+// => concat_vectors(src, undef)
+def combine_build_unmerge : GICombineRule<
+ (defs root:$root, register_matchinfo:$unmergeSrc),
+ (match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
+ [{ return Helper.matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]),
+ (apply [{ Helper.applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }])
+>;
+
def merge_combines: GICombineGroup<[
unmerge_anyext_build_vector,
unmerge_merge,
@@ -930,7 +939,8 @@ def merge_combines: GICombineGroup<[
unmerge_dead_to_trunc,
unmerge_zext_to_zext,
merge_of_x_and_undef,
- merge_of_x_and_zero
+ merge_of_x_and_zero,
+ combine_build_unmerge
]>;
// Under certain conditions, transform:
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 9ace7d65413ad..b7ade264cfc64 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -3463,6 +3463,91 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
isConstTrueVal(TLI, Cst, IsVector, IsFP);
}
+// This pattern aims to match the following shape to avoid extra mov
+// instructions
+// G_BUILD_VECTOR(
+// G_UNMERGE_VALUES(src, 0)
+// G_UNMERGE_VALUES(src, 1)
+// G_IMPLICIT_DEF
+// G_IMPLICIT_DEF
+// )
+// ->
+// G_CONCAT_VECTORS(
+// src,
+// undef
+// )
+bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ Register &UnmergeSrc) const {
+ assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+
+ unsigned BuildUseCount = MI.getNumOperands() - 1;
+
+ if (BuildUseCount % 2 != 0)
+ return false;
+
+ unsigned NumUnmerge = BuildUseCount / 2;
+
+ // Check the first operand is an unmerge
+ auto *MaybeUnmerge = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
+ if (MaybeUnmerge->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
+ return false;
+
+ // Check that the resultant concat will be legal
+ auto UnmergeEltSize =
+ MRI.getType(MaybeUnmerge->getOperand(1).getReg()).getScalarSizeInBits();
+ auto UnmergeEltCount = MaybeUnmerge->getNumDefs();
+
+ if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 &&
+ UnmergeEltSize * UnmergeEltCount != 128))
+ return false;
+
+ // Check that all of the operands before the midpoint come from the same
+ // unmerge and are in the same order as they are used in the build_vector
+ for (unsigned I = 0; I < NumUnmerge; ++I) {
+ auto MaybeUnmergeReg = MI.getOperand(I + 1).getReg();
+ auto *Unmerge = getDefIgnoringCopies(MaybeUnmergeReg, MRI);
+
+ if (Unmerge != MaybeUnmerge)
+ return false;
+
+ if (Unmerge->getOperand(I).getReg() != MaybeUnmergeReg)
+ return false;
+ }
+
+ // Check that all of the unmerged values are used
+ if (UnmergeEltCount != NumUnmerge)
+ return false;
+
+ // Check that all of the operands after the mid point are undefs.
+ for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
+ auto *Undef = getDefIgnoringCopies(MI.getOperand(I + 1).getReg(), MRI);
+
+ if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
+ return false;
+ }
+
+ // Unmerge should only use one register so we can use the last one
+ for (auto &UnmergeUse :
+ getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI)->all_uses())
+ UnmergeSrc = UnmergeUse.getReg();
+
+ return true;
+}
+
+void CombinerHelper::applyCombineBuildUnmerge(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ Register &UnmergeSrc) const {
+ assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
+ B.setInstrAndDebugLoc(MI);
+
+ Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
+ B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
+
+ MI.eraseFromParent();
+}
+
// This combine tries to reduce the number of scalarised G_TRUNC instructions by
// using vector truncates instead
//
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 5d6feeaa363e8..278314792bfb9 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -332,13 +332,6 @@ def combine_mul_cmlt : GICombineRule<
(apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }])
>;
-def combine_build_unmerge : GICombineRule<
- (defs root:$root, register_matchinfo:$unmergeSrc),
- (match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
- [{ return matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]),
- (apply [{ applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }])
->;
-
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@@ -373,7 +366,7 @@ def AArch64PostLegalizerCombiner
select_to_minmax, or_to_bsp, combine_concat_vector,
commute_constant_to_rhs, extract_vec_elt_combines,
push_freeze_to_prevent_poison_from_propagating,
- combine_mul_cmlt, combine_use_vector_truncate, combine_build_unmerge,
+ combine_mul_cmlt, combine_use_vector_truncate,
extmultomull, truncsat_combines, lshr_of_trunc_of_lshr,
funnel_shift_from_or_shift_constants_are_legal]> {
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 5d6b47806722d..9b07d84a292f5 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -134,85 +134,6 @@ bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
}
-// This pattern aims to match the following shape to avoid extra mov
-// instructions
-// G_BUILD_VECTOR(
-// G_UNMERGE_VALUES(src, 0)
-// G_UNMERGE_VALUES(src, 1)
-// G_IMPLICIT_DEF
-// G_IMPLICIT_DEF
-// )
-// ->
-// G_CONCAT_VECTORS(
-// src,
-// undef
-// )
-bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
- Register &UnmergeSrc) {
- assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
-
- unsigned BuildUseCount = MI.getNumOperands() - 1;
-
- if (BuildUseCount % 2 != 0)
- return false;
-
- unsigned HalfWayIndex = BuildUseCount / 2;
-
- // Check the first operand is an unmerge
- auto *MaybeUnmerge = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
- if (MaybeUnmerge->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
- return false;
-
- // Check that the resultant concat will be legal
- auto UnmergeEltSize =
- MRI.getType(MaybeUnmerge->getOperand(1).getReg()).getScalarSizeInBits();
- auto UnmergeEltCount = MaybeUnmerge->getNumDefs();
-
- if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 &&
- UnmergeEltSize * UnmergeEltCount != 128))
- return false;
-
- // Check that all of the operands before the midpoint come from the same
- // unmerge and are in the same order as they are used in the build_vector
- for (unsigned I = 0; I < HalfWayIndex; ++I) {
- auto MaybeUnmergeReg = MI.getOperand(I + 1).getReg();
- auto *Unmerge = getDefIgnoringCopies(MaybeUnmergeReg, MRI);
-
- if (Unmerge != MaybeUnmerge)
- return false;
-
- if (Unmerge->getOperand(I).getReg() != MaybeUnmergeReg)
- return false;
- }
-
- // Check that all of the operands after the mid point are undefs.
- for (unsigned I = HalfWayIndex; I < BuildUseCount; ++I) {
- auto *Undef = getDefIgnoringCopies(MI.getOperand(I + 1).getReg(), MRI);
-
- if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
- return false;
- }
-
- // Unmerge should only use one register so we can use the last one
- for (auto &UnmergeUse :
- getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI)->all_uses())
- UnmergeSrc = UnmergeUse.getReg();
-
- return true;
-}
-
-void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B, Register &UnmergeSrc) {
- assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
- B.setInstrAndDebugLoc(MI);
-
- Register UndefVec = MRI.createGenericVirtualRegister(MRI.getType(UnmergeSrc));
- B.buildUndef(UndefVec);
- B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
-
- MI.eraseFromParent();
-}
-
bool matchAArch64MulConstCombine(
MachineInstr &MI, MachineRegisterInfo &MRI,
std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
>From aee79e7f70a21e2fb37a96b22351d311ca3fd0bc Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 3 Nov 2025 13:51:54 +0000
Subject: [PATCH 6/9] Remove unused set import
---
llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 9b07d84a292f5..73384f3b4798e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -39,7 +39,6 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
-#include <set>
#define GET_GICOMBINER_DEPS
#include "AArch64GenPostLegalizeGICombiner.inc"
>From 8df8592bc5a7ddbef00e320632ac985f5e5d830f Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 3 Nov 2025 15:46:41 +0000
Subject: [PATCH 7/9] Update missed test, check legality properly and remove
unnecessary loop
---
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 20 +++----
.../build-vector-packed-partial-undef.ll | 54 +++++--------------
2 files changed, 22 insertions(+), 52 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b7ade264cfc64..285b6393941a3 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGenTypes/LowLevelType.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstrTypes.h"
@@ -3493,14 +3494,12 @@ bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI,
if (MaybeUnmerge->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
return false;
- // Check that the resultant concat will be legal
- auto UnmergeEltSize =
- MRI.getType(MaybeUnmerge->getOperand(1).getReg()).getScalarSizeInBits();
- auto UnmergeEltCount = MaybeUnmerge->getNumDefs();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT UnmergeSrcTy = MRI.getType(MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands()-1).getReg());
- if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 &&
- UnmergeEltSize * UnmergeEltCount != 128))
- return false;
+ // Ensure we only generate legal instructions post-legalizer
+ if (!IsPreLegalize && !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy, UnmergeSrcTy}}))
+ return false;
// Check that all of the operands before the midpoint come from the same
// unmerge and are in the same order as they are used in the build_vector
@@ -3516,7 +3515,7 @@ bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI,
}
// Check that all of the unmerged values are used
- if (UnmergeEltCount != NumUnmerge)
+ if (MaybeUnmerge->getNumDefs() != NumUnmerge)
return false;
// Check that all of the operands after the mid point are undefs.
@@ -3527,10 +3526,7 @@ bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI,
return false;
}
- // Unmerge should only use one register so we can use the last one
- for (auto &UnmergeUse :
- getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI)->all_uses())
- UnmergeSrc = UnmergeUse.getReg();
+ UnmergeSrc = MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands()-1).getReg();
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
index c1b8bc6031b18..f7dbcd137e742 100644
--- a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
@@ -762,25 +762,13 @@ define void @undef_hi3_v4f16(half %arg0) {
}
define void @undef_hi2_v4i16(<2 x i16> %arg0) {
-; GFX8-SDAG-LABEL: undef_hi2_v4i16:
-; GFX8-SDAG: ; %bb.0:
-; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT: ;;#ASMSTART
-; GFX8-SDAG-NEXT: ; use v[0:1]
-; GFX8-SDAG-NEXT: ;;#ASMEND
-; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: undef_hi2_v4i16:
-; GFX8-GISEL: ; %bb.0:
-; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX8-GISEL-NEXT: ;;#ASMSTART
-; GFX8-GISEL-NEXT: ; use v[0:1]
-; GFX8-GISEL-NEXT: ;;#ASMEND
-; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX8-LABEL: undef_hi2_v4i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v[0:1]
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: undef_hi2_v4i16:
; GFX9: ; %bb.0:
@@ -803,25 +791,13 @@ define void @undef_hi2_v4i16(<2 x i16> %arg0) {
}
define void @undef_hi2_v4f16(<2 x half> %arg0) {
-; GFX8-SDAG-LABEL: undef_hi2_v4f16:
-; GFX8-SDAG: ; %bb.0:
-; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT: ;;#ASMSTART
-; GFX8-SDAG-NEXT: ; use v[0:1]
-; GFX8-SDAG-NEXT: ;;#ASMEND
-; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: undef_hi2_v4f16:
-; GFX8-GISEL: ; %bb.0:
-; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX8-GISEL-NEXT: ;;#ASMSTART
-; GFX8-GISEL-NEXT: ; use v[0:1]
-; GFX8-GISEL-NEXT: ;;#ASMEND
-; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX8-LABEL: undef_hi2_v4f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v[0:1]
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: undef_hi2_v4f16:
; GFX9: ; %bb.0:
@@ -842,5 +818,3 @@ define void @undef_hi2_v4f16(<2 x half> %arg0) {
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
ret void
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX8: {{.*}}
>From c9ef45fa7a610fe7a29a9f247d9873a7d05815f8 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 3 Nov 2025 15:50:50 +0000
Subject: [PATCH 8/9] Linting
---
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 285b6393941a3..394beeee1ab18 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -3495,11 +3495,13 @@ bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI,
return false;
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- LLT UnmergeSrcTy = MRI.getType(MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands()-1).getReg());
+ LLT UnmergeSrcTy = MRI.getType(
+ MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands() - 1).getReg());
// Ensure we only generate legal instructions post-legalizer
- if (!IsPreLegalize && !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy, UnmergeSrcTy}}))
- return false;
+ if (!IsPreLegalize && !isLegal({TargetOpcode::G_CONCAT_VECTORS,
+ {DstTy, UnmergeSrcTy, UnmergeSrcTy}}))
+ return false;
// Check that all of the operands before the midpoint come from the same
// unmerge and are in the same order as they are used in the build_vector
@@ -3526,7 +3528,8 @@ bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI,
return false;
}
- UnmergeSrc = MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands()-1).getReg();
+ UnmergeSrc =
+ MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands() - 1).getReg();
return true;
}
>From c9cd6a0198407122573ca7a9be0e721e8dbf872c Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Fri, 7 Nov 2025 10:23:03 +0000
Subject: [PATCH 9/9] Use cast templates & convenience functions. Also simplify
legality check
---
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 39 +++++++++----------
1 file changed, 19 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 394beeee1ab18..fdd9d012c399e 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -3480,57 +3480,56 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI,
MachineRegisterInfo &MRI,
Register &UnmergeSrc) const {
- assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
-
- unsigned BuildUseCount = MI.getNumOperands() - 1;
+ auto &BV = cast<GBuildVector>(MI);
+ unsigned BuildUseCount = BV.getNumSources();
if (BuildUseCount % 2 != 0)
return false;
unsigned NumUnmerge = BuildUseCount / 2;
- // Check the first operand is an unmerge
- auto *MaybeUnmerge = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
- if (MaybeUnmerge->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
+ auto *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
+
+ // Check the first operand is an unmerge and has the correct number of
+ // operands
+ if (!Unmerge || Unmerge->getNumOperands() != NumUnmerge + 1)
return false;
+ UnmergeSrc = Unmerge->getSourceReg();
+
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- LLT UnmergeSrcTy = MRI.getType(
- MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands() - 1).getReg());
+ LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);
// Ensure we only generate legal instructions post-legalizer
- if (!IsPreLegalize && !isLegal({TargetOpcode::G_CONCAT_VECTORS,
- {DstTy, UnmergeSrcTy, UnmergeSrcTy}}))
+ if (!IsPreLegalize &&
+ !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy}}))
return false;
// Check that all of the operands before the midpoint come from the same
// unmerge and are in the same order as they are used in the build_vector
for (unsigned I = 0; I < NumUnmerge; ++I) {
- auto MaybeUnmergeReg = MI.getOperand(I + 1).getReg();
- auto *Unmerge = getDefIgnoringCopies(MaybeUnmergeReg, MRI);
+ auto MaybeUnmergeReg = BV.getSourceReg(I);
+ auto *LoopUnmerge = getOpcodeDef<GUnmerge>(MaybeUnmergeReg, MRI);
- if (Unmerge != MaybeUnmerge)
+ if (!LoopUnmerge || LoopUnmerge != Unmerge)
return false;
- if (Unmerge->getOperand(I).getReg() != MaybeUnmergeReg)
+ if (LoopUnmerge->getOperand(I).getReg() != MaybeUnmergeReg)
return false;
}
// Check that all of the unmerged values are used
- if (MaybeUnmerge->getNumDefs() != NumUnmerge)
+ if (Unmerge->getNumDefs() != NumUnmerge)
return false;
// Check that all of the operands after the mid point are undefs.
for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
- auto *Undef = getDefIgnoringCopies(MI.getOperand(I + 1).getReg(), MRI);
+ auto *Undef = getDefIgnoringCopies(BV.getSourceReg(I), MRI);
if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
return false;
}
- UnmergeSrc =
- MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands() - 1).getReg();
-
return true;
}
@@ -8510,4 +8509,4 @@ bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI,
}
return false;
-}
+}
\ No newline at end of file
More information about the llvm-commits
mailing list