[llvm] [AArch64][GlobalISel] Add combine for build_vector(unmerge, unmerge, undef, undef) (PR #165539)

Ryan Cowan via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 3 07:48:43 PST 2025


https://github.com/HolyMolyCowMan updated https://github.com/llvm/llvm-project/pull/165539

>From 3afd4eba40468069f4ceb95f7470e4344e9fc5b1 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Wed, 29 Oct 2025 10:26:54 +0000
Subject: [PATCH 1/7] [AArch64][GlobalISel] Add combine for
 build_vector(unmerge, unmerge, undef, undef)

---
 llvm/lib/Target/AArch64/AArch64Combine.td     |  9 +-
 .../GISel/AArch64PostLegalizerCombiner.cpp    | 96 ++++++++++++++++++-
 llvm/test/CodeGen/AArch64/fptrunc.ll          | 18 +---
 llvm/test/CodeGen/AArch64/itofp.ll            | 48 +++-------
 4 files changed, 120 insertions(+), 51 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 278314792bfb9..056e6145487d2 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -332,6 +332,13 @@ def combine_mul_cmlt : GICombineRule<
   (apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }])
 >;
 
+def combine_build_unmerge : GICombineRule<
+  (defs root:$root, register_matchinfo:$unmergeSrc),
+  (match (wip_match_opcode G_BUILD_VECTOR):$root,
+         [{ return matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]),
+  (apply [{ applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }])
+>;
+
 // Post-legalization combines which should happen at all optimization levels.
 // (E.g. ones that facilitate matching for the selector) For example, matching
 // pseudos.
@@ -366,7 +373,7 @@ def AArch64PostLegalizerCombiner
                         select_to_minmax, or_to_bsp, combine_concat_vector,
                         commute_constant_to_rhs, extract_vec_elt_combines,
                         push_freeze_to_prevent_poison_from_propagating,
-                        combine_mul_cmlt, combine_use_vector_truncate,
+                        combine_mul_cmlt, combine_use_vector_truncate, combine_build_unmerge,
                         extmultomull, truncsat_combines, lshr_of_trunc_of_lshr,
                         funnel_shift_from_or_shift_constants_are_legal]> {
 }
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index fa7bb6ecc35ee..2f17fd33559ee 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -39,6 +39,7 @@
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/Support/Debug.h"
+#include <set>
 
 #define GET_GICOMBINER_DEPS
 #include "AArch64GenPostLegalizeGICombiner.inc"
@@ -133,6 +134,99 @@ bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
   return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
 }
 
+// This pattern aims to match the following shape to avoid extra mov
+// instructions
+// G_BUILD_VECTOR(
+//   G_UNMERGE_VALUES(src, 0)
+//   G_UNMERGE_VALUES(src, 1)
+//   G_IMPLICIT_DEF
+//   G_IMPLICIT_DEF
+// )
+// ->
+// G_CONCAT_VECTORS(
+//   undef
+//   src
+// )
+bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
+                              Register &UnmergeSrc) {
+  assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+
+  unsigned UnmergeInstrCount = 0;
+  unsigned UndefInstrCount = 0;
+
+  unsigned UnmergeEltCount = 0;
+  unsigned UnmergeEltSize = 0;
+
+  Register UnmergeSrcTemp;
+
+  std::set<int> KnownRegs;
+
+  for (auto Use : MI.all_uses()) {
+    auto *Def = getDefIgnoringCopies(Use.getReg(), MRI);
+
+    if (!Def) {
+      return false;
+    }
+
+    unsigned Opcode = Def->getOpcode();
+
+    switch (Opcode) {
+    default:
+      return false;
+    case TargetOpcode::G_IMPLICIT_DEF:
+      ++UndefInstrCount;
+      break;
+    case TargetOpcode::G_UNMERGE_VALUES:
+      ++UnmergeInstrCount;
+
+      UnmergeEltSize = MRI.getType(Use.getReg()).getScalarSizeInBits();
+      UnmergeEltCount = Def->getNumDefs();
+      if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 &&
+                                  UnmergeEltSize * UnmergeEltCount != 128)) {
+        return false;
+      }
+
+      // Unmerge should only use one register so we can use the last one
+      for (auto UnmergeUse : Def->all_uses())
+        UnmergeSrcTemp = UnmergeUse.getReg();
+
+      // Track unique sources for the G_UNMERGE_VALUES
+      unsigned RegId = UnmergeSrcTemp.id();
+      if (KnownRegs.find(RegId) != KnownRegs.end())
+        continue;
+
+      KnownRegs.insert(RegId);
+
+      // We know the unmerge is a valid target now so store the register.
+      UnmergeSrc = UnmergeSrcTemp;
+
+      break;
+    }
+  }
+
+  // Only want to match patterns that pad half of a vector with undefined. We
+  // also want to ensure that these values come from a single unmerge and all
+  // unmerged values are consumed.
+  if (UndefInstrCount != UnmergeInstrCount ||
+      UnmergeEltCount != UnmergeInstrCount || KnownRegs.size() != 1) {
+    return false;
+  }
+
+  return true;
+}
+
+void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
+                              MachineIRBuilder &B, Register &UnmergeSrc) {
+  assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
+  B.setInstrAndDebugLoc(MI);
+
+  Register UndefVec = MRI.createGenericVirtualRegister(MRI.getType(UnmergeSrc));
+  B.buildUndef(UndefVec);
+  B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
+
+  MI.eraseFromParent();
+}
+
 bool matchAArch64MulConstCombine(
     MachineInstr &MI, MachineRegisterInfo &MRI,
     std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
@@ -890,4 +984,4 @@ namespace llvm {
 FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) {
   return new AArch64PostLegalizerCombiner(IsOptNone);
 }
-} // end namespace llvm
+} // end namespace llvm
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll
index 1f84c944d7c16..ce1a2fc48c2e7 100644
--- a/llvm/test/CodeGen/AArch64/fptrunc.ll
+++ b/llvm/test/CodeGen/AArch64/fptrunc.ll
@@ -345,19 +345,11 @@ entry:
 }
 
 define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) {
-; CHECK-SD-LABEL: fptrunc_v2f32_v2f16:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fptrunc_v2f32_v2f16:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-GI-NEXT:    fcvtn v0.4h, v1.4s
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fptrunc_v2f32_v2f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
 entry:
   %c = fptrunc <2 x float> %a to <2 x half>
   ret <2 x half> %c
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index caf87a13f283b..6d168edf180a4 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -5763,18 +5763,14 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) {
 ; CHECK-NOFP16-GI:       // %bb.0: // %entry
 ; CHECK-NOFP16-GI-NEXT:    scvtf v0.2d, v0.2d
 ; CHECK-NOFP16-GI-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-NOFP16-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-NOFP16-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: stofp_v2i64_v2f16:
 ; CHECK-FP16-GI:       // %bb.0: // %entry
 ; CHECK-FP16-GI-NEXT:    scvtf v0.2d, v0.2d
 ; CHECK-FP16-GI-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-FP16-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-FP16-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-FP16-GI-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-FP16-GI-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-FP16-GI-NEXT:    ret
 entry:
   %c = sitofp <2 x i64> %a to <2 x half>
@@ -5808,18 +5804,14 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) {
 ; CHECK-NOFP16-GI:       // %bb.0: // %entry
 ; CHECK-NOFP16-GI-NEXT:    ucvtf v0.2d, v0.2d
 ; CHECK-NOFP16-GI-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-NOFP16-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-NOFP16-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: utofp_v2i64_v2f16:
 ; CHECK-FP16-GI:       // %bb.0: // %entry
 ; CHECK-FP16-GI-NEXT:    ucvtf v0.2d, v0.2d
 ; CHECK-FP16-GI-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-FP16-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-FP16-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-FP16-GI-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-FP16-GI-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-FP16-GI-NEXT:    ret
 entry:
   %c = uitofp <2 x i64> %a to <2 x half>
@@ -6232,17 +6224,13 @@ define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) {
 ; CHECK-NOFP16-GI-LABEL: stofp_v2i32_v2f16:
 ; CHECK-NOFP16-GI:       // %bb.0: // %entry
 ; CHECK-NOFP16-GI-NEXT:    scvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-NOFP16-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: stofp_v2i32_v2f16:
 ; CHECK-FP16-GI:       // %bb.0: // %entry
 ; CHECK-FP16-GI-NEXT:    scvtf v0.2s, v0.2s
-; CHECK-FP16-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-FP16-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-FP16-GI-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-FP16-GI-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-FP16-GI-NEXT:    ret
 entry:
   %c = sitofp <2 x i32> %a to <2 x half>
@@ -6267,17 +6255,13 @@ define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) {
 ; CHECK-NOFP16-GI-LABEL: utofp_v2i32_v2f16:
 ; CHECK-NOFP16-GI:       // %bb.0: // %entry
 ; CHECK-NOFP16-GI-NEXT:    ucvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-NOFP16-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: utofp_v2i32_v2f16:
 ; CHECK-FP16-GI:       // %bb.0: // %entry
 ; CHECK-FP16-GI-NEXT:    ucvtf v0.2s, v0.2s
-; CHECK-FP16-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-FP16-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-FP16-GI-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-FP16-GI-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-FP16-GI-NEXT:    ret
 entry:
   %c = uitofp <2 x i32> %a to <2 x half>
@@ -6480,9 +6464,7 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) {
 ; CHECK-NOFP16-GI-NEXT:    shl v0.2s, v0.2s, #16
 ; CHECK-NOFP16-GI-NEXT:    sshr v0.2s, v0.2s, #16
 ; CHECK-NOFP16-GI-NEXT:    scvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-NOFP16-GI-NEXT:    ret
 entry:
   %c = sitofp <2 x i16> %a to <2 x half>
@@ -6509,9 +6491,7 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) {
 ; CHECK-NOFP16-GI-NEXT:    movi d1, #0x00ffff0000ffff
 ; CHECK-NOFP16-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-NOFP16-GI-NEXT:    ucvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-NOFP16-GI-NEXT:    ret
 entry:
   %c = uitofp <2 x i16> %a to <2 x half>
@@ -6766,9 +6746,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
 ; CHECK-NOFP16-GI-NEXT:    shl v0.2s, v0.2s, #24
 ; CHECK-NOFP16-GI-NEXT:    sshr v0.2s, v0.2s, #24
 ; CHECK-NOFP16-GI-NEXT:    scvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-NOFP16-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: stofp_v2i8_v2f16:
@@ -6817,9 +6795,7 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
 ; CHECK-NOFP16-GI-NEXT:    movi d1, #0x0000ff000000ff
 ; CHECK-NOFP16-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-NOFP16-GI-NEXT:    ucvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-NOFP16-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: utofp_v2i8_v2f16:

>From 372742e8a80a52bbd5c4f779bc94266f4b0bbe4a Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Wed, 29 Oct 2025 11:15:21 +0000
Subject: [PATCH 2/7] Fix unnecessary copying from using auto.

---
 .../lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 2f17fd33559ee..c768d4b7a9a1c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -161,7 +161,7 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
 
   std::set<int> KnownRegs;
 
-  for (auto Use : MI.all_uses()) {
+  for (auto &Use : MI.all_uses()) {
     auto *Def = getDefIgnoringCopies(Use.getReg(), MRI);
 
     if (!Def) {
@@ -187,7 +187,7 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
       }
 
       // Unmerge should only use one register so we can use the last one
-      for (auto UnmergeUse : Def->all_uses())
+      for (auto &UnmergeUse : Def->all_uses())
         UnmergeSrcTemp = UnmergeUse.getReg();
 
       // Track unique sources for the G_UNMERGE_VALUES

>From a0376ab6ba0a31cd6f318fee1c8e210503dac715 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Thu, 30 Oct 2025 12:06:59 +0000
Subject: [PATCH 3/7] Remove wip_match_opcode and check operand order and
 consecutiveness

---
 llvm/lib/Target/AArch64/AArch64Combine.td     |  2 +-
 .../GISel/AArch64PostLegalizerCombiner.cpp    | 36 +++++++++++++++----
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 056e6145487d2..5d6feeaa363e8 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -334,7 +334,7 @@ def combine_mul_cmlt : GICombineRule<
 
 def combine_build_unmerge : GICombineRule<
   (defs root:$root, register_matchinfo:$unmergeSrc),
-  (match (wip_match_opcode G_BUILD_VECTOR):$root,
+  (match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
          [{ return matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]),
   (apply [{ applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }])
 >;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index c768d4b7a9a1c..33de0af5808e2 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -144,20 +144,24 @@ bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
 // )
 // ->
 // G_CONCAT_VECTORS(
+//   src,
 //   undef
-//   src
 // )
 bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
                               Register &UnmergeSrc) {
   assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
 
-  unsigned UnmergeInstrCount = 0;
+  unsigned UnmergeUseCount = 0;
   unsigned UndefInstrCount = 0;
 
   unsigned UnmergeEltCount = 0;
   unsigned UnmergeEltSize = 0;
 
+  unsigned BuildOperandCount = MI.getNumOperands();
+  bool EncounteredUndef = false;
+
   Register UnmergeSrcTemp;
+  MachineInstr *UnmergeInstr;
 
   std::set<int> KnownRegs;
 
@@ -170,14 +174,21 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
 
     unsigned Opcode = Def->getOpcode();
 
+    // Ensure that the unmerged instructions are consecutive and before the
+    // undefined values by checking we don't encounter an undef before we reach
+    // half way
+    if (EncounteredUndef && UnmergeUseCount < BuildOperandCount / 2)
+      return false;
+
     switch (Opcode) {
     default:
       return false;
     case TargetOpcode::G_IMPLICIT_DEF:
       ++UndefInstrCount;
+      EncounteredUndef = true;
       break;
     case TargetOpcode::G_UNMERGE_VALUES:
-      ++UnmergeInstrCount;
+      ++UnmergeUseCount;
 
       UnmergeEltSize = MRI.getType(Use.getReg()).getScalarSizeInBits();
       UnmergeEltCount = Def->getNumDefs();
@@ -197,8 +208,10 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
 
       KnownRegs.insert(RegId);
 
-      // We know the unmerge is a valid target now so store the register.
+      // We know the unmerge is a valid target now so store the register & the
+      // instruction.
       UnmergeSrc = UnmergeSrcTemp;
+      UnmergeInstr = Def;
 
       break;
     }
@@ -207,11 +220,22 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
   // Only want to match patterns that pad half of a vector with undefined. We
   // also want to ensure that these values come from a single unmerge and all
   // unmerged values are consumed.
-  if (UndefInstrCount != UnmergeInstrCount ||
-      UnmergeEltCount != UnmergeInstrCount || KnownRegs.size() != 1) {
+  if (UndefInstrCount != UnmergeUseCount ||
+      UnmergeEltCount != UnmergeUseCount || KnownRegs.size() != 1) {
     return false;
   }
 
+  // Check the operands of the unmerge are used in the same order they are
+  // defined G_BUILD_VECTOR always defines 1 output so we know the uses start
+  // from index 1
+  for (unsigned OperandIndex = 0; OperandIndex < UnmergeUseCount;
+       ++OperandIndex) {
+    Register BuildReg = MI.getOperand(OperandIndex + 1).getReg();
+    Register UnmergeReg = UnmergeInstr->getOperand(OperandIndex).getReg();
+    if (BuildReg != UnmergeReg)
+      return false;
+  }
+
   return true;
 }
 

>From 3f8d7da31402bf0af864060f60442b1d51955d40 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 3 Nov 2025 10:12:49 +0000
Subject: [PATCH 4/7] Simplify logic & check for legality of resulting
 concatenation

---
 .../GISel/AArch64PostLegalizerCombiner.cpp    | 100 ++++++------------
 1 file changed, 31 insertions(+), 69 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 33de0af5808e2..5d6b47806722d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -151,91 +151,53 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
                               Register &UnmergeSrc) {
   assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
 
-  unsigned UnmergeUseCount = 0;
-  unsigned UndefInstrCount = 0;
+  unsigned BuildUseCount = MI.getNumOperands() - 1;
 
-  unsigned UnmergeEltCount = 0;
-  unsigned UnmergeEltSize = 0;
-
-  unsigned BuildOperandCount = MI.getNumOperands();
-  bool EncounteredUndef = false;
+  if (BuildUseCount % 2 != 0)
+    return false;
 
-  Register UnmergeSrcTemp;
-  MachineInstr *UnmergeInstr;
+  unsigned HalfWayIndex = BuildUseCount / 2;
 
-  std::set<int> KnownRegs;
+  // Check the first operand is an unmerge
+  auto *MaybeUnmerge = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
+  if (MaybeUnmerge->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
+    return false;
 
-  for (auto &Use : MI.all_uses()) {
-    auto *Def = getDefIgnoringCopies(Use.getReg(), MRI);
+  // Check that the resultant concat will be legal
+  auto UnmergeEltSize =
+      MRI.getType(MaybeUnmerge->getOperand(1).getReg()).getScalarSizeInBits();
+  auto UnmergeEltCount = MaybeUnmerge->getNumDefs();
 
-    if (!Def) {
-      return false;
-    }
+  if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 &&
+                              UnmergeEltSize * UnmergeEltCount != 128))
+    return false;
 
-    unsigned Opcode = Def->getOpcode();
+  // Check that all of the operands before the midpoint come from the same
+  // unmerge and are in the same order as they are used in the build_vector
+  for (unsigned I = 0; I < HalfWayIndex; ++I) {
+    auto MaybeUnmergeReg = MI.getOperand(I + 1).getReg();
+    auto *Unmerge = getDefIgnoringCopies(MaybeUnmergeReg, MRI);
 
-    // Ensure that the unmerged instructions are consecutive and before the
-    // undefined values by checking we don't encounter an undef before we reach
-    // half way
-    if (EncounteredUndef && UnmergeUseCount < BuildOperandCount / 2)
+    if (Unmerge != MaybeUnmerge)
       return false;
 
-    switch (Opcode) {
-    default:
+    if (Unmerge->getOperand(I).getReg() != MaybeUnmergeReg)
       return false;
-    case TargetOpcode::G_IMPLICIT_DEF:
-      ++UndefInstrCount;
-      EncounteredUndef = true;
-      break;
-    case TargetOpcode::G_UNMERGE_VALUES:
-      ++UnmergeUseCount;
-
-      UnmergeEltSize = MRI.getType(Use.getReg()).getScalarSizeInBits();
-      UnmergeEltCount = Def->getNumDefs();
-      if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 &&
-                                  UnmergeEltSize * UnmergeEltCount != 128)) {
-        return false;
-      }
-
-      // Unmerge should only use one register so we can use the last one
-      for (auto &UnmergeUse : Def->all_uses())
-        UnmergeSrcTemp = UnmergeUse.getReg();
-
-      // Track unique sources for the G_UNMERGE_VALUES
-      unsigned RegId = UnmergeSrcTemp.id();
-      if (KnownRegs.find(RegId) != KnownRegs.end())
-        continue;
-
-      KnownRegs.insert(RegId);
-
-      // We know the unmerge is a valid target now so store the register & the
-      // instruction.
-      UnmergeSrc = UnmergeSrcTemp;
-      UnmergeInstr = Def;
-
-      break;
-    }
   }
 
-  // Only want to match patterns that pad half of a vector with undefined. We
-  // also want to ensure that these values come from a single unmerge and all
-  // unmerged values are consumed.
-  if (UndefInstrCount != UnmergeUseCount ||
-      UnmergeEltCount != UnmergeUseCount || KnownRegs.size() != 1) {
-    return false;
-  }
+  // Check that all of the operands after the mid point are undefs.
+  for (unsigned I = HalfWayIndex; I < BuildUseCount; ++I) {
+    auto *Undef = getDefIgnoringCopies(MI.getOperand(I + 1).getReg(), MRI);
 
-  // Check the operands of the unmerge are used in the same order they are
-  // defined G_BUILD_VECTOR always defines 1 output so we know the uses start
-  // from index 1
-  for (unsigned OperandIndex = 0; OperandIndex < UnmergeUseCount;
-       ++OperandIndex) {
-    Register BuildReg = MI.getOperand(OperandIndex + 1).getReg();
-    Register UnmergeReg = UnmergeInstr->getOperand(OperandIndex).getReg();
-    if (BuildReg != UnmergeReg)
+    if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
       return false;
   }
 
+  // Unmerge should only use one register so we can use the last one
+  for (auto &UnmergeUse :
+       getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI)->all_uses())
+    UnmergeSrc = UnmergeUse.getReg();
+
   return true;
 }
 

>From 69ba5ab26d6bad07c0e0c655d55733160ff7186f Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 3 Nov 2025 10:13:58 +0000
Subject: [PATCH 5/7] Make this a non-backend specific optimisation & check all
 unmerged values are used

---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |  6 ++
 .../include/llvm/Target/GlobalISel/Combine.td | 12 ++-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 85 +++++++++++++++++++
 llvm/lib/Target/AArch64/AArch64Combine.td     |  9 +-
 .../GISel/AArch64PostLegalizerCombiner.cpp    | 79 -----------------
 5 files changed, 103 insertions(+), 88 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 36cb90b1bc134..968bbbf778254 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -647,6 +647,12 @@ class CombinerHelper {
   bool matchRotateOutOfRange(MachineInstr &MI) const;
   void applyRotateOutOfRange(MachineInstr &MI) const;
 
+  bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                Register &UnmergeSrc) const;
+  void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                MachineIRBuilder &B,
+                                Register &UnmergeSrc) const;
+
   bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const;
   void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const;
 
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 119695e53c3cb..0ab2d9487a295 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -921,6 +921,15 @@ def merge_of_x_and_zero : GICombineRule <
          [{ return Helper.matchMergeXAndZero(*${MI}, ${matchinfo}); }]),
   (apply [{ Helper.applyBuildFn(*${MI}, ${matchinfo}); }])>;
 
+// Transform build_vector(unmerge(src, 0), ... unmerge(src, n), undef, ..., undef)
+// => concat_vectors(src, undef)
+def combine_build_unmerge : GICombineRule<
+  (defs root:$root, register_matchinfo:$unmergeSrc),
+  (match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
+         [{ return Helper.matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]),
+  (apply [{ Helper.applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }])
+>;
+
 def merge_combines: GICombineGroup<[
   unmerge_anyext_build_vector,
   unmerge_merge,
@@ -930,7 +939,8 @@ def merge_combines: GICombineGroup<[
   unmerge_dead_to_trunc,
   unmerge_zext_to_zext,
   merge_of_x_and_undef,
-  merge_of_x_and_zero
+  merge_of_x_and_zero,
+  combine_build_unmerge
 ]>;
 
 // Under certain conditions, transform:
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 9ace7d65413ad..b7ade264cfc64 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -3463,6 +3463,91 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
          isConstTrueVal(TLI, Cst, IsVector, IsFP);
 }
 
+// This pattern aims to match the following shape to avoid extra mov
+// instructions
+// G_BUILD_VECTOR(
+//   G_UNMERGE_VALUES(src, 0)
+//   G_UNMERGE_VALUES(src, 1)
+//   G_IMPLICIT_DEF
+//   G_IMPLICIT_DEF
+// )
+// ->
+// G_CONCAT_VECTORS(
+//   src,
+//   undef
+// )
+bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI,
+                                              MachineRegisterInfo &MRI,
+                                              Register &UnmergeSrc) const {
+  assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+
+  unsigned BuildUseCount = MI.getNumOperands() - 1;
+
+  if (BuildUseCount % 2 != 0)
+    return false;
+
+  unsigned NumUnmerge = BuildUseCount / 2;
+
+  // Check the first operand is an unmerge
+  auto *MaybeUnmerge = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
+  if (MaybeUnmerge->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
+    return false;
+
+  // Check that the resultant concat will be legal
+  auto UnmergeEltSize =
+      MRI.getType(MaybeUnmerge->getOperand(1).getReg()).getScalarSizeInBits();
+  auto UnmergeEltCount = MaybeUnmerge->getNumDefs();
+
+  if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 &&
+                              UnmergeEltSize * UnmergeEltCount != 128))
+    return false;
+
+  // Check that all of the operands before the midpoint come from the same
+  // unmerge and are in the same order as they are used in the build_vector
+  for (unsigned I = 0; I < NumUnmerge; ++I) {
+    auto MaybeUnmergeReg = MI.getOperand(I + 1).getReg();
+    auto *Unmerge = getDefIgnoringCopies(MaybeUnmergeReg, MRI);
+
+    if (Unmerge != MaybeUnmerge)
+      return false;
+
+    if (Unmerge->getOperand(I).getReg() != MaybeUnmergeReg)
+      return false;
+  }
+
+  // Check that all of the unmerged values are used
+  if (UnmergeEltCount != NumUnmerge)
+    return false;
+
+  // Check that all of the operands after the mid point are undefs.
+  for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
+    auto *Undef = getDefIgnoringCopies(MI.getOperand(I + 1).getReg(), MRI);
+
+    if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
+      return false;
+  }
+
+  // Unmerge should only use one register so we can use the last one
+  for (auto &UnmergeUse :
+       getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI)->all_uses())
+    UnmergeSrc = UnmergeUse.getReg();
+
+  return true;
+}
+
+void CombinerHelper::applyCombineBuildUnmerge(MachineInstr &MI,
+                                              MachineRegisterInfo &MRI,
+                                              MachineIRBuilder &B,
+                                              Register &UnmergeSrc) const {
+  assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
+  B.setInstrAndDebugLoc(MI);
+
+  Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
+  B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
+
+  MI.eraseFromParent();
+}
+
 // This combine tries to reduce the number of scalarised G_TRUNC instructions by
 // using vector truncates instead
 //
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 5d6feeaa363e8..278314792bfb9 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -332,13 +332,6 @@ def combine_mul_cmlt : GICombineRule<
   (apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }])
 >;
 
-def combine_build_unmerge : GICombineRule<
-  (defs root:$root, register_matchinfo:$unmergeSrc),
-  (match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
-         [{ return matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]),
-  (apply [{ applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }])
->;
-
 // Post-legalization combines which should happen at all optimization levels.
 // (E.g. ones that facilitate matching for the selector) For example, matching
 // pseudos.
@@ -373,7 +366,7 @@ def AArch64PostLegalizerCombiner
                         select_to_minmax, or_to_bsp, combine_concat_vector,
                         commute_constant_to_rhs, extract_vec_elt_combines,
                         push_freeze_to_prevent_poison_from_propagating,
-                        combine_mul_cmlt, combine_use_vector_truncate, combine_build_unmerge,
+                        combine_mul_cmlt, combine_use_vector_truncate,
                         extmultomull, truncsat_combines, lshr_of_trunc_of_lshr,
                         funnel_shift_from_or_shift_constants_are_legal]> {
 }
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 5d6b47806722d..9b07d84a292f5 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -134,85 +134,6 @@ bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
   return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
 }
 
-// This pattern aims to match the following shape to avoid extra mov
-// instructions
-// G_BUILD_VECTOR(
-//   G_UNMERGE_VALUES(src, 0)
-//   G_UNMERGE_VALUES(src, 1)
-//   G_IMPLICIT_DEF
-//   G_IMPLICIT_DEF
-// )
-// ->
-// G_CONCAT_VECTORS(
-//   src,
-//   undef
-// )
-bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
-                              Register &UnmergeSrc) {
-  assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
-
-  unsigned BuildUseCount = MI.getNumOperands() - 1;
-
-  if (BuildUseCount % 2 != 0)
-    return false;
-
-  unsigned HalfWayIndex = BuildUseCount / 2;
-
-  // Check the first operand is an unmerge
-  auto *MaybeUnmerge = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
-  if (MaybeUnmerge->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
-    return false;
-
-  // Check that the resultant concat will be legal
-  auto UnmergeEltSize =
-      MRI.getType(MaybeUnmerge->getOperand(1).getReg()).getScalarSizeInBits();
-  auto UnmergeEltCount = MaybeUnmerge->getNumDefs();
-
-  if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 &&
-                              UnmergeEltSize * UnmergeEltCount != 128))
-    return false;
-
-  // Check that all of the operands before the midpoint come from the same
-  // unmerge and are in the same order as they are used in the build_vector
-  for (unsigned I = 0; I < HalfWayIndex; ++I) {
-    auto MaybeUnmergeReg = MI.getOperand(I + 1).getReg();
-    auto *Unmerge = getDefIgnoringCopies(MaybeUnmergeReg, MRI);
-
-    if (Unmerge != MaybeUnmerge)
-      return false;
-
-    if (Unmerge->getOperand(I).getReg() != MaybeUnmergeReg)
-      return false;
-  }
-
-  // Check that all of the operands after the mid point are undefs.
-  for (unsigned I = HalfWayIndex; I < BuildUseCount; ++I) {
-    auto *Undef = getDefIgnoringCopies(MI.getOperand(I + 1).getReg(), MRI);
-
-    if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
-      return false;
-  }
-
-  // Unmerge should only use one register so we can use the last one
-  for (auto &UnmergeUse :
-       getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI)->all_uses())
-    UnmergeSrc = UnmergeUse.getReg();
-
-  return true;
-}
-
-void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
-                              MachineIRBuilder &B, Register &UnmergeSrc) {
-  assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
-  B.setInstrAndDebugLoc(MI);
-
-  Register UndefVec = MRI.createGenericVirtualRegister(MRI.getType(UnmergeSrc));
-  B.buildUndef(UndefVec);
-  B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
-
-  MI.eraseFromParent();
-}
-
 bool matchAArch64MulConstCombine(
     MachineInstr &MI, MachineRegisterInfo &MRI,
     std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {

>From aee79e7f70a21e2fb37a96b22351d311ca3fd0bc Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 3 Nov 2025 13:51:54 +0000
Subject: [PATCH 6/7] Remove unused set import

---
 llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 9b07d84a292f5..73384f3b4798e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -39,7 +39,6 @@
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/Support/Debug.h"
-#include <set>
 
 #define GET_GICOMBINER_DEPS
 #include "AArch64GenPostLegalizeGICombiner.inc"

>From 8df8592bc5a7ddbef00e320632ac985f5e5d830f Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 3 Nov 2025 15:46:41 +0000
Subject: [PATCH 7/7] Update missed test, check legality properly and remove
 unnecessary loop

---
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 20 +++----
 .../build-vector-packed-partial-undef.ll      | 54 +++++--------------
 2 files changed, 22 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b7ade264cfc64..285b6393941a3 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGenTypes/LowLevelType.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InstrTypes.h"
@@ -3493,14 +3494,12 @@ bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI,
   if (MaybeUnmerge->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
     return false;
 
-  // Check that the resultant concat will be legal
-  auto UnmergeEltSize =
-      MRI.getType(MaybeUnmerge->getOperand(1).getReg()).getScalarSizeInBits();
-  auto UnmergeEltCount = MaybeUnmerge->getNumDefs();
+  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+  LLT UnmergeSrcTy = MRI.getType(MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands()-1).getReg());
 
-  if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 &&
-                              UnmergeEltSize * UnmergeEltCount != 128))
-    return false;
+  // Ensure we only generate legal instructions post-legalizer
+  if (!IsPreLegalize && !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy, UnmergeSrcTy}}))
+      return false;
 
   // Check that all of the operands before the midpoint come from the same
   // unmerge and are in the same order as they are used in the build_vector
@@ -3516,7 +3515,7 @@ bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI,
   }
 
   // Check that all of the unmerged values are used
-  if (UnmergeEltCount != NumUnmerge)
+  if (MaybeUnmerge->getNumDefs() != NumUnmerge)
     return false;
 
   // Check that all of the operands after the mid point are undefs.
@@ -3527,10 +3526,7 @@ bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI,
       return false;
   }
 
-  // Unmerge should only use one register so we can use the last one
-  for (auto &UnmergeUse :
-       getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI)->all_uses())
-    UnmergeSrc = UnmergeUse.getReg();
+  UnmergeSrc = MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands()-1).getReg();
 
   return true;
 }
diff --git a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
index c1b8bc6031b18..f7dbcd137e742 100644
--- a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
@@ -762,25 +762,13 @@ define void @undef_hi3_v4f16(half %arg0) {
 }
 
 define void @undef_hi2_v4i16(<2 x i16> %arg0) {
-; GFX8-SDAG-LABEL: undef_hi2_v4i16:
-; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    ;;#ASMSTART
-; GFX8-SDAG-NEXT:    ; use v[0:1]
-; GFX8-SDAG-NEXT:    ;;#ASMEND
-; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: undef_hi2_v4i16:
-; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX8-GISEL-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX8-GISEL-NEXT:    ;;#ASMSTART
-; GFX8-GISEL-NEXT:    ; use v[0:1]
-; GFX8-GISEL-NEXT:    ;;#ASMEND
-; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX8-LABEL: undef_hi2_v4i16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    ;;#ASMSTART
+; GFX8-NEXT:    ; use v[0:1]
+; GFX8-NEXT:    ;;#ASMEND
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: undef_hi2_v4i16:
 ; GFX9:       ; %bb.0:
@@ -803,25 +791,13 @@ define void @undef_hi2_v4i16(<2 x i16> %arg0) {
 }
 
 define void @undef_hi2_v4f16(<2 x half> %arg0) {
-; GFX8-SDAG-LABEL: undef_hi2_v4f16:
-; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    ;;#ASMSTART
-; GFX8-SDAG-NEXT:    ; use v[0:1]
-; GFX8-SDAG-NEXT:    ;;#ASMEND
-; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: undef_hi2_v4f16:
-; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX8-GISEL-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX8-GISEL-NEXT:    ;;#ASMSTART
-; GFX8-GISEL-NEXT:    ; use v[0:1]
-; GFX8-GISEL-NEXT:    ;;#ASMEND
-; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX8-LABEL: undef_hi2_v4f16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    ;;#ASMSTART
+; GFX8-NEXT:    ; use v[0:1]
+; GFX8-NEXT:    ;;#ASMEND
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: undef_hi2_v4f16:
 ; GFX9:       ; %bb.0:
@@ -842,5 +818,3 @@ define void @undef_hi2_v4f16(<2 x half> %arg0) {
   call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
   ret void
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX8: {{.*}}



More information about the llvm-commits mailing list