[llvm] 4723f3c - [AMDGPU][GlobalISel] Combine unmerge of undef

Sebastian Neubauer via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 26 03:31:02 PST 2022


Author: Sebastian Neubauer
Date: 2022-01-26T12:30:36+01:00
New Revision: 4723f3cf03a90473dc9ae006e65d9019f00cb771

URL: https://github.com/llvm/llvm-project/commit/4723f3cf03a90473dc9ae006e65d9019f00cb771
DIFF: https://github.com/llvm/llvm-project/commit/4723f3cf03a90473dc9ae006e65d9019f00cb771.diff

LOG: [AMDGPU][GlobalISel] Combine unmerge of undef

Fold (unmerge undef) -> undef, undef, ...

Differential Revision: https://reviews.llvm.org/D118138

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    llvm/include/llvm/Target/GlobalISel/Combine.td
    llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 1d07d7d6e7ae..45c27c25aea0 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -323,6 +323,11 @@ class CombinerHelper {
   void applyCombineUnmergeConstant(MachineInstr &MI,
                                    SmallVectorImpl<APInt> &Csts);
 
+  /// Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
+  bool
+  matchCombineUnmergeUndef(MachineInstr &MI,
+                           std::function<void(MachineIRBuilder &)> &MatchInfo);
+
   /// Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
   bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI);
   void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI);

diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 9736e52a7b5b..4859cf6b57b7 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -535,6 +535,14 @@ def unmerge_cst : GICombineRule<
   (apply [{ Helper.applyCombineUnmergeConstant(*${d}, ${info}); }])
 >;
 
+// Fold (unmerge undef) -> undef, undef, ...
+def unmerge_undef : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$info),
+  (match (wip_match_opcode G_UNMERGE_VALUES): $root,
+         [{ return Helper.matchCombineUnmergeUndef(*${root}, ${info}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])
+>;
+
 // Transform x,y<dead> = unmerge z -> x = trunc z.
 def unmerge_dead_to_trunc : GICombineRule<
   (defs root:$d),
@@ -844,7 +852,8 @@ def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      propagate_undef_any_op,
                                      propagate_undef_all_ops,
                                      propagate_undef_shuffle_mask,
-                                     erase_undef_store]>;
+                                     erase_undef_store,
+                                     unmerge_undef]>;
 
 def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
                                         binop_same_val, binop_left_to_zero,

diff  --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 4b5a19155c67..d6a009744161 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1748,6 +1748,20 @@ void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
   MI.eraseFromParent();
 }
 
+bool CombinerHelper::matchCombineUnmergeUndef(
+    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+  unsigned SrcIdx = MI.getNumOperands() - 1;
+  Register SrcReg = MI.getOperand(SrcIdx).getReg();
+  MatchInfo = [&MI](MachineIRBuilder &B) {
+    unsigned NumElems = MI.getNumOperands() - 1;
+    for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+      Register DstReg = MI.getOperand(Idx).getReg();
+      B.buildUndef(DstReg);
+    }
+  };
+  return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
+}
+
 bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
          "Expected an unmerge");

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
index 0d8f48c1291c..0519a9c7db32 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
@@ -25,7 +25,6 @@ define amdgpu_kernel void @kernel_caller_stack() {
 ; MUBUF-NEXT:    v_mov_b32_e32 v1, 11
 ; MUBUF-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
 ; MUBUF-NEXT:    v_mov_b32_e32 v1, 12
-; MUBUF-NEXT:    buffer_store_dword v0, v0, s[0:3], 0 offen
 ; MUBUF-NEXT:    s_getpc_b64 s[4:5]
 ; MUBUF-NEXT:    s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32 at rel32@lo+4
 ; MUBUF-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32 at rel32@hi+12
@@ -38,7 +37,6 @@ define amdgpu_kernel void @kernel_caller_stack() {
 ; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; FLATSCR-NEXT:    s_mov_b32 s32, 0
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; FLATSCR-NEXT:    scratch_store_dword off, v0, s32
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, 9
 ; FLATSCR-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, 10
@@ -256,7 +254,6 @@ define void @func_caller_stack() {
 ; MUBUF-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
 ; MUBUF-NEXT:    v_mov_b32_e32 v1, 12
 ; MUBUF-NEXT:    v_writelane_b32 v40, s31, 1
-; MUBUF-NEXT:    buffer_store_dword v0, v0, s[0:3], 0 offen
 ; MUBUF-NEXT:    s_getpc_b64 s[4:5]
 ; MUBUF-NEXT:    s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32 at rel32@lo+4
 ; MUBUF-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32 at rel32@hi+12
@@ -281,7 +278,6 @@ define void @func_caller_stack() {
 ; FLATSCR-NEXT:    v_writelane_b32 v40, s33, 2
 ; FLATSCR-NEXT:    s_mov_b32 s33, s32
 ; FLATSCR-NEXT:    s_add_i32 s32, s32, 16
-; FLATSCR-NEXT:    scratch_store_dword off, v0, s32
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, 9
 ; FLATSCR-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, 10

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir
new file mode 100644
index 000000000000..f0aa0b09a954
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir
@@ -0,0 +1,22 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name: split_unmerge_undef
+tracksRegLiveness: true
+legalized: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+    ; CHECK-LABEL: name: split_unmerge_undef
+    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+    ; CHECK-NEXT: {{  $}}
+    %ptr1:_(p1) = COPY $vgpr0_vgpr1
+    %ptr2:_(p1) = COPY $vgpr2_vgpr3
+    %ptr3:_(p1) = COPY $vgpr4_vgpr5
+    %vec:_(<3 x s32>) = G_IMPLICIT_DEF
+    %p1:_(s32), %p2:_(s32), %p3:_(s32) = G_UNMERGE_VALUES %vec
+    G_STORE %p1:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4)
+    G_STORE %p2:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4)
+    G_STORE %p3:_(s32), %ptr3:_(p1) :: (store (s32), addrspace 1, align 4)
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir
index 4262fd9e8f2d..c0f96d210f58 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir
@@ -11,12 +11,8 @@ body:             |
     ; GCN-LABEL: name: select_from_
diff erent_results_of_unmerge_values
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
-    ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
-    ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[UV]], [[UV1]]
-    ; GCN-NEXT: $vgpr0 = COPY [[SELECT]](s32)
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32)
     ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0
     %0:_(<2 x s32>) = G_IMPLICIT_DEF
     %1:_(s32) = COPY $vgpr0
@@ -38,10 +34,8 @@ body:             |
     ; GCN-LABEL: name: select_from_same_results_of_unmerge_values
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[DEF]](<2 x s32>)
-    ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64)
-    ; GCN-NEXT: $vgpr0 = COPY [[TRUNC]](s32)
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32)
     ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0
     %0:_(<2 x s32>) = G_IMPLICIT_DEF
     %1:_(s32) = COPY $vgpr0


        


More information about the llvm-commits mailing list