[llvm] 64599ac - [MachineSink] Don't reject sinking because of dead def in isProfitableToSinkTo().
Jonas Paulsson via llvm-commits
llvm-commits at lists.llvm.org
Tue May 16 01:02:23 PDT 2023
Author: Jonas Paulsson
Date: 2023-05-16T10:00:44+02:00
New Revision: 64599ac97eb1346dcc0e45b722c4a84c8c72e899
URL: https://github.com/llvm/llvm-project/commit/64599ac97eb1346dcc0e45b722c4a84c8c72e899
DIFF: https://github.com/llvm/llvm-project/commit/64599ac97eb1346dcc0e45b722c4a84c8c72e899.diff
LOG: [MachineSink] Don't reject sinking because of dead def in isProfitableToSinkTo().
An instruction should be sunk (if otherwise legal and profitable) regardless
of if it has a dead def of a physreg or not. Physreg defs are checked in other
places and sinking is only done with dead defs of regs that are not live into
the target MBB.
Differential Revision: https://reviews.llvm.org/D150447
Reviewed By: sebastian-ne, arsenm
Added:
llvm/test/CodeGen/SystemZ/machinesink-dead-cc.mir
Modified:
llvm/lib/CodeGen/MachineSink.cpp
llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 5047503f9011e..1bd370584fadb 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -806,12 +806,10 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
continue;
if (Reg.isPhysical()) {
- if (MO.isUse() &&
- (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO)))
- continue;
-
- // Don't handle non-constant and non-ignorable physical register.
- return false;
+ // Don't handle non-constant and non-ignorable physical register uses.
+ if (MO.isUse() && !MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO))
+ return false;
+ continue;
}
// Users for the defs are all dominated by SuccToSinkTo.
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index b79a729cb5fb9..a5edc2ea19362 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -861,6 +861,8 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
; GCN-NEXT: .LBB5_2: ; %bb10
; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[14:15]
+; GCN-NEXT: s_and_b64 s[6:7], exec, s[4:5]
+; GCN-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13]
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: s_andn2_b64 exec, exec, s[12:13]
; GCN-NEXT: s_cbranch_execz .LBB5_7
@@ -873,12 +875,10 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
; GCN-NEXT: ; %bb.4: ; %bb2
; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
-; GCN-NEXT: s_and_b64 s[6:7], exec, s[4:5]
; GCN-NEXT: s_mov_b32 s9, s8
; GCN-NEXT: s_mov_b32 s10, s8
; GCN-NEXT: s_mov_b32 s11, s8
; GCN-NEXT: v_mov_b32_e32 v0, s8
-; GCN-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13]
; GCN-NEXT: v_mov_b32_e32 v1, s9
; GCN-NEXT: v_mov_b32_e32 v2, s10
; GCN-NEXT: v_mov_b32_e32 v3, s11
diff --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
index fdbd7ff8d652a..af4fcb4950b65 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
@@ -10,25 +10,26 @@ define void @needs_and(i32 %arg) {
; GCN-LABEL: needs_and:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_mov_b32 s8, 1
+; GCN-NEXT: s_mov_b32 s10, 1
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: s_branch .LBB0_2
; GCN-NEXT: .LBB0_1: ; %endif
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
-; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN-NEXT: s_add_i32 s8, s8, 1
+; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
+; GCN-NEXT: s_and_b64 s[4:5], exec, vcc
+; GCN-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7]
+; GCN-NEXT: s_add_i32 s10, s10, 1
; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
; GCN-NEXT: s_cbranch_execz .LBB0_4
; GCN-NEXT: .LBB0_2: ; %loop
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0
-; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s8, v0
-; GCN-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7]
-; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GCN-NEXT: v_cmp_gt_u32_e64 s[4:5], s10, v0
+; GCN-NEXT: v_cmp_le_u32_e32 vcc, s10, v0
+; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
; GCN-NEXT: s_cbranch_execz .LBB0_1
; GCN-NEXT: ; %bb.3: ; %then
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
-; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_nop 1
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4
; GCN-NEXT: s_branch .LBB0_1
; GCN-NEXT: .LBB0_4: ; %loopexit
@@ -107,13 +108,13 @@ define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
; GCN-NEXT: .LBB2_1: ; %endif
; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
+; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
+; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7]
; GCN-NEXT: s_add_i32 s10, s10, 1
; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
; GCN-NEXT: s_cbranch_execz .LBB2_4
; GCN-NEXT: .LBB2_2: ; %loop
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
-; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7]
; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s10, v0
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GCN-NEXT: s_cbranch_execz .LBB2_1
diff --git a/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll b/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
index 7738a2daecc9e..2d8680ea030ce 100644
--- a/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
@@ -13,12 +13,12 @@ define amdgpu_cs void @should_not_hoist_set_inactive(<4 x i32> inreg %i14, i32 i
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_waitcnt_depctr 0xffe3
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s8
+; GCN-NEXT: s_and_b32 s8, exec_lo, s6
+; GCN-NEXT: s_or_b32 s7, s8, s7
; GCN-NEXT: s_andn2_b32 exec_lo, exec_lo, s7
; GCN-NEXT: s_cbranch_execz .LBB0_5
; GCN-NEXT: .LBB0_2: ; %bb
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT: s_and_b32 s8, exec_lo, s6
-; GCN-NEXT: s_or_b32 s7, s8, s7
; GCN-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GCN-NEXT: s_cbranch_execz .LBB0_1
; GCN-NEXT: ; %bb.3: ; %bb1
diff --git a/llvm/test/CodeGen/SystemZ/machinesink-dead-cc.mir b/llvm/test/CodeGen/SystemZ/machinesink-dead-cc.mir
new file mode 100644
index 0000000000000..8516cc9114c76
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/machinesink-dead-cc.mir
@@ -0,0 +1,57 @@
+# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z15 -O3 -run-pass=machine-sink %s -o - \
+# RUN: -verify-machineinstrs | FileCheck %s
+#
+# Test that the AGHIK can be sunk into %bb.4. It has a def of CC, but it is dead.
+
+--- |
+ define void @fun() { ret void }
+...
+
+# CHECK-LABEL: bb.4:
+# CHECK: %1:gr64bit = nsw AGHIK %0, -4, implicit-def dead $cc
+# CHECK-NEXT: CGHI %1, 0, implicit-def $cc
+# CHECK-NEXT: BRC 14, 6, %bb.1, implicit $cc
+# CHECK-NEXT: J %bb.5
+
+
+---
+name: fun
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr64bit }
+ - { id: 1, class: gr64bit }
+ - { id: 2, class: grx32bit }
+ - { id: 3, class: gr64bit }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0:
+
+ %2:grx32bit = LHIMux 0
+ %3:gr64bit = IMPLICIT_DEF
+
+ bb.1:
+
+ %0:gr64bit = PHI %3, %bb.0, %1, %bb.4
+
+ bb.2:
+
+ %1:gr64bit = nsw AGHIK %0, -4, implicit-def dead $cc
+ CHIMux %2, 0, implicit-def $cc
+ BRC 14, 6, %bb.4, implicit $cc
+ J %bb.3
+
+ bb.3:
+
+ bb.4:
+
+ CGHI %1, 0, implicit-def $cc
+ BRC 14, 6, %bb.1, implicit $cc
+ J %bb.5
+
+ bb.5:
+ Return
+
+...
diff --git a/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
index 6f9d13cde6b2e..ca839bbb0dced 100644
--- a/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
+++ b/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
@@ -102,17 +102,17 @@ define dso_local void @foo(ptr %a0, ptr %a1, ptr %a2, ptr %a3, ptr %a4, ptr %a5)
; CHECK-NEXT: jns .LBB0_20
; CHECK-NEXT: .LBB0_5: # %a50b
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT: shrl $31, %r9d
; CHECK-NEXT: movl %eax, %r10d
; CHECK-NEXT: orl %esi, %r10d
; CHECK-NEXT: jns .LBB0_26
; CHECK-NEXT: .LBB0_6: # %a57b
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT: shrl $31, %r10d
+; CHECK-NEXT: shrl $31, %r9d
; CHECK-NEXT: testb %r9b, %r9b
; CHECK-NEXT: je .LBB0_30
; CHECK-NEXT: .LBB0_7: # %a66b
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: shrl $31, %r10d
; CHECK-NEXT: testb %r10b, %r10b
; CHECK-NEXT: jne .LBB0_8
; CHECK-NEXT: .p2align 4, 0x90
diff --git a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
index dead31a8ba013..68bdb9235546b 100644
--- a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
+++ b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
@@ -196,10 +196,8 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: ja .LBB1_14
; CHECK-NEXT: .LBB1_7: # %vector.body.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: leaq -4(%rcx), %r8
-; CHECK-NEXT: movq %r8, %r11
-; CHECK-NEXT: shrq $2, %r11
-; CHECK-NEXT: btl $2, %r8d
+; CHECK-NEXT: leaq -4(%rcx), %r11
+; CHECK-NEXT: btl $2, %r11d
; CHECK-NEXT: jb .LBB1_8
; CHECK-NEXT: # %bb.9: # %vector.body.prol.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
@@ -208,12 +206,12 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: movdqu %xmm0, (%r13,%rbp,8)
; CHECK-NEXT: movdqu %xmm0, 16(%r13,%rbp,8)
; CHECK-NEXT: movl $4, %r10d
-; CHECK-NEXT: testq %r11, %r11
+; CHECK-NEXT: shrq $2, %r11
; CHECK-NEXT: jne .LBB1_11
; CHECK-NEXT: jmp .LBB1_13
; CHECK-NEXT: .LBB1_8: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: xorl %r10d, %r10d
-; CHECK-NEXT: testq %r11, %r11
+; CHECK-NEXT: shrq $2, %r11
; CHECK-NEXT: je .LBB1_13
; CHECK-NEXT: .LBB1_11: # %vector.body.preheader.new
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
diff --git a/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll b/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll
index 9c881dd816994..2a98e3dd91ee1 100644
--- a/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll
+++ b/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll
@@ -60,10 +60,11 @@ zero:
; CHECK: JMP_1 %bb.4
; CHECK: bb.4
; CHECK: bb.5
-; CHECK: %4:gr64 = LEA64r %10, 1, $noreg, 8, $noreg
-; CHECK-LV: %3:gr64 = COPY killed %10
-; CHECK-LIS: %3:gr64 = COPY %10
-; CHECK-LV: TEST64rr killed %1, %1, implicit-def $eflags
+; CHECK: %3:gr64 = COPY %10
+; CHECK-LV: %4:gr64 = COPY killed %10
+; CHECK-LV: %4:gr64 = nuw ADD64ri8 %4, 8, implicit-def dead $eflags
+; CHECK-LIS: %4:gr64 = LEA64r %10, 1, $noreg, 8, $noreg
+; CHECK: TEST64rr killed %1, %1, implicit-def $eflags
; CHECK: JCC_1 %bb.1, 5, implicit killed $eflags
; CHECK: JMP_1 %bb.6
define void @test2(i8 addrspace(1)* %this, i32 %0, i32 addrspace(1)* %p0, i8 addrspace(1)* %p1) gc "statepoint-example" personality i32* ()* @fake_personality_function {
More information about the llvm-commits
mailing list