[llvm] 4723f3c - [AMDGPU][GlobalISel] Combine unmerge of undef
Sebastian Neubauer via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 26 03:31:02 PST 2022
Author: Sebastian Neubauer
Date: 2022-01-26T12:30:36+01:00
New Revision: 4723f3cf03a90473dc9ae006e65d9019f00cb771
URL: https://github.com/llvm/llvm-project/commit/4723f3cf03a90473dc9ae006e65d9019f00cb771
DIFF: https://github.com/llvm/llvm-project/commit/4723f3cf03a90473dc9ae006e65d9019f00cb771.diff
LOG: [AMDGPU][GlobalISel] Combine unmerge of undef
Fold (unmerge undef) -> undef, undef, ...
Differential Revision: https://reviews.llvm.org/D118138
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 1d07d7d6e7ae..45c27c25aea0 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -323,6 +323,11 @@ class CombinerHelper {
void applyCombineUnmergeConstant(MachineInstr &MI,
SmallVectorImpl<APInt> &Csts);
+ /// Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
+ bool
+ matchCombineUnmergeUndef(MachineInstr &MI,
+ std::function<void(MachineIRBuilder &)> &MatchInfo);
+
/// Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI);
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 9736e52a7b5b..4859cf6b57b7 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -535,6 +535,14 @@ def unmerge_cst : GICombineRule<
(apply [{ Helper.applyCombineUnmergeConstant(*${d}, ${info}); }])
>;
+// Fold (unmerge undef) -> undef, undef, ...
+def unmerge_undef : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_UNMERGE_VALUES): $root,
+ [{ return Helper.matchCombineUnmergeUndef(*${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])
+>;
+
// Transform x,y<dead> = unmerge z -> x = trunc z.
def unmerge_dead_to_trunc : GICombineRule<
(defs root:$d),
@@ -844,7 +852,8 @@ def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
propagate_undef_any_op,
propagate_undef_all_ops,
propagate_undef_shuffle_mask,
- erase_undef_store]>;
+ erase_undef_store,
+ unmerge_undef]>;
def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
binop_same_val, binop_left_to_zero,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 4b5a19155c67..d6a009744161 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1748,6 +1748,20 @@ void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
MI.eraseFromParent();
}
+bool CombinerHelper::matchCombineUnmergeUndef(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ unsigned SrcIdx = MI.getNumOperands() - 1;
+ Register SrcReg = MI.getOperand(SrcIdx).getReg();
+ MatchInfo = [&MI](MachineIRBuilder &B) {
+ unsigned NumElems = MI.getNumOperands() - 1;
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+ Register DstReg = MI.getOperand(Idx).getReg();
+ B.buildUndef(DstReg);
+ }
+ };
+ return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
+}
+
bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
"Expected an unmerge");
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
index 0d8f48c1291c..0519a9c7db32 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
@@ -25,7 +25,6 @@ define amdgpu_kernel void @kernel_caller_stack() {
; MUBUF-NEXT: v_mov_b32_e32 v1, 11
; MUBUF-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
; MUBUF-NEXT: v_mov_b32_e32 v1, 12
-; MUBUF-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen
; MUBUF-NEXT: s_getpc_b64 s[4:5]
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32 at rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32 at rel32@hi+12
@@ -38,7 +37,6 @@ define amdgpu_kernel void @kernel_caller_stack() {
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3
; FLATSCR-NEXT: s_mov_b32 s32, 0
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
-; FLATSCR-NEXT: scratch_store_dword off, v0, s32
; FLATSCR-NEXT: v_mov_b32_e32 v0, 9
; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:4
; FLATSCR-NEXT: v_mov_b32_e32 v0, 10
@@ -256,7 +254,6 @@ define void @func_caller_stack() {
; MUBUF-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
; MUBUF-NEXT: v_mov_b32_e32 v1, 12
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
-; MUBUF-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen
; MUBUF-NEXT: s_getpc_b64 s[4:5]
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32 at rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32 at rel32@hi+12
@@ -281,7 +278,6 @@ define void @func_caller_stack() {
; FLATSCR-NEXT: v_writelane_b32 v40, s33, 2
; FLATSCR-NEXT: s_mov_b32 s33, s32
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
-; FLATSCR-NEXT: scratch_store_dword off, v0, s32
; FLATSCR-NEXT: v_mov_b32_e32 v0, 9
; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:4
; FLATSCR-NEXT: v_mov_b32_e32 v0, 10
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir
new file mode 100644
index 000000000000..f0aa0b09a954
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir
@@ -0,0 +1,22 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name: split_unmerge_undef
+tracksRegLiveness: true
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; CHECK-LABEL: name: split_unmerge_undef
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; CHECK-NEXT: {{ $}}
+ %ptr1:_(p1) = COPY $vgpr0_vgpr1
+ %ptr2:_(p1) = COPY $vgpr2_vgpr3
+ %ptr3:_(p1) = COPY $vgpr4_vgpr5
+ %vec:_(<3 x s32>) = G_IMPLICIT_DEF
+ %p1:_(s32), %p2:_(s32), %p3:_(s32) = G_UNMERGE_VALUES %vec
+ G_STORE %p1:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4)
+ G_STORE %p2:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4)
+ G_STORE %p3:_(s32), %ptr3:_(p1) :: (store (s32), addrspace 1, align 4)
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir
index 4262fd9e8f2d..c0f96d210f58 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir
@@ -11,12 +11,8 @@ body: |
; GCN-LABEL: name: select_from_
diff erent_results_of_unmerge_values
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
- ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
- ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[UV]], [[UV1]]
- ; GCN-NEXT: $vgpr0 = COPY [[SELECT]](s32)
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32)
; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0
%0:_(<2 x s32>) = G_IMPLICIT_DEF
%1:_(s32) = COPY $vgpr0
@@ -38,10 +34,8 @@ body: |
; GCN-LABEL: name: select_from_same_results_of_unmerge_values
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[DEF]](<2 x s32>)
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64)
- ; GCN-NEXT: $vgpr0 = COPY [[TRUNC]](s32)
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32)
; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0
%0:_(<2 x s32>) = G_IMPLICIT_DEF
%1:_(s32) = COPY $vgpr0
More information about the llvm-commits
mailing list