[llvm] 64599ac - [MachineSink] Don't reject sinking because of dead def in isProfitableToSinkTo().

Jonas Paulsson via llvm-commits llvm-commits at lists.llvm.org
Tue May 16 01:02:23 PDT 2023


Author: Jonas Paulsson
Date: 2023-05-16T10:00:44+02:00
New Revision: 64599ac97eb1346dcc0e45b722c4a84c8c72e899

URL: https://github.com/llvm/llvm-project/commit/64599ac97eb1346dcc0e45b722c4a84c8c72e899
DIFF: https://github.com/llvm/llvm-project/commit/64599ac97eb1346dcc0e45b722c4a84c8c72e899.diff

LOG: [MachineSink] Don't reject sinking because of dead def in isProfitableToSinkTo().

An instruction should be sunk (if otherwise legal and profitable) regardless
of if it has a dead def of a physreg or not. Physreg defs are checked in other
places and sinking is only done with dead defs of regs that are not live into
the target MBB.

Differential Revision: https://reviews.llvm.org/D150447

Reviewed By: sebastian-ne, arsenm

Added: 
    llvm/test/CodeGen/SystemZ/machinesink-dead-cc.mir

Modified: 
    llvm/lib/CodeGen/MachineSink.cpp
    llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
    llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
    llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
    llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
    llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
    llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 5047503f9011e..1bd370584fadb 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -806,12 +806,10 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
       continue;
 
     if (Reg.isPhysical()) {
-      if (MO.isUse() &&
-          (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO)))
-        continue;
-
-      // Don't handle non-constant and non-ignorable physical register.
-      return false;
+      // Don't handle non-constant and non-ignorable physical register uses.
+      if (MO.isUse() && !MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO))
+        return false;
+      continue;
     }
 
     // Users for the defs are all dominated by SuccToSinkTo.

diff  --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index b79a729cb5fb9..a5edc2ea19362 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -861,6 +861,8 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-NEXT:  .LBB5_2: ; %bb10
 ; GCN-NEXT:    ; in Loop: Header=BB5_3 Depth=1
 ; GCN-NEXT:    s_or_b64 exec, exec, s[14:15]
+; GCN-NEXT:    s_and_b64 s[6:7], exec, s[4:5]
+; GCN-NEXT:    s_or_b64 s[12:13], s[6:7], s[12:13]
 ; GCN-NEXT:    s_mov_b64 s[6:7], 0
 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[12:13]
 ; GCN-NEXT:    s_cbranch_execz .LBB5_7
@@ -873,12 +875,10 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-NEXT:  ; %bb.4: ; %bb2
 ; GCN-NEXT:    ; in Loop: Header=BB5_3 Depth=1
 ; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
-; GCN-NEXT:    s_and_b64 s[6:7], exec, s[4:5]
 ; GCN-NEXT:    s_mov_b32 s9, s8
 ; GCN-NEXT:    s_mov_b32 s10, s8
 ; GCN-NEXT:    s_mov_b32 s11, s8
 ; GCN-NEXT:    v_mov_b32_e32 v0, s8
-; GCN-NEXT:    s_or_b64 s[12:13], s[6:7], s[12:13]
 ; GCN-NEXT:    v_mov_b32_e32 v1, s9
 ; GCN-NEXT:    v_mov_b32_e32 v2, s10
 ; GCN-NEXT:    v_mov_b32_e32 v3, s11

diff  --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
index fdbd7ff8d652a..af4fcb4950b65 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
@@ -10,25 +10,26 @@ define void @needs_and(i32 %arg) {
 ; GCN-LABEL: needs_and:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s8, 1
+; GCN-NEXT:    s_mov_b32 s10, 1
 ; GCN-NEXT:    s_mov_b64 s[6:7], 0
 ; GCN-NEXT:    s_branch .LBB0_2
 ; GCN-NEXT:  .LBB0_1: ; %endif
 ; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
-; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GCN-NEXT:    s_add_i32 s8, s8, 1
+; GCN-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GCN-NEXT:    s_and_b64 s[4:5], exec, vcc
+; GCN-NEXT:    s_or_b64 s[6:7], s[4:5], s[6:7]
+; GCN-NEXT:    s_add_i32 s10, s10, 1
 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    s_cbranch_execz .LBB0_4
 ; GCN-NEXT:  .LBB0_2: ; %loop
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v0
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, s8, v0
-; GCN-NEXT:    s_or_b64 s[6:7], s[4:5], s[6:7]
-; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GCN-NEXT:    v_cmp_gt_u32_e64 s[4:5], s10, v0
+; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s10, v0
+; GCN-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
 ; GCN-NEXT:    s_cbranch_execz .LBB0_1
 ; GCN-NEXT:  ; %bb.3: ; %then
 ; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
-; GCN-NEXT:    s_nop 0
+; GCN-NEXT:    s_nop 1
 ; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], s4
 ; GCN-NEXT:    s_branch .LBB0_1
 ; GCN-NEXT:  .LBB0_4: ; %loopexit
@@ -107,13 +108,13 @@ define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
 ; GCN-NEXT:  .LBB2_1: ; %endif
 ; GCN-NEXT:    ; in Loop: Header=BB2_2 Depth=1
 ; GCN-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
+; GCN-NEXT:    s_or_b64 s[6:7], s[8:9], s[6:7]
 ; GCN-NEXT:    s_add_i32 s10, s10, 1
 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    s_cbranch_execz .LBB2_4
 ; GCN-NEXT:  .LBB2_2: ; %loop
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
-; GCN-NEXT:    s_or_b64 s[6:7], s[8:9], s[6:7]
 ; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, s10, v0
 ; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GCN-NEXT:    s_cbranch_execz .LBB2_1

diff  --git a/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll b/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
index 7738a2daecc9e..2d8680ea030ce 100644
--- a/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
@@ -13,12 +13,12 @@ define amdgpu_cs void @should_not_hoist_set_inactive(<4 x i32> inreg %i14, i32 i
 ; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
 ; GCN-NEXT:    s_waitcnt_depctr 0xffe3
 ; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s8
+; GCN-NEXT:    s_and_b32 s8, exec_lo, s6
+; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_andn2_b32 exec_lo, exec_lo, s7
 ; GCN-NEXT:    s_cbranch_execz .LBB0_5
 ; GCN-NEXT:  .LBB0_2: ; %bb
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    s_and_b32 s8, exec_lo, s6
-; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GCN-NEXT:    s_cbranch_execz .LBB0_1
 ; GCN-NEXT:  ; %bb.3: ; %bb1

diff  --git a/llvm/test/CodeGen/SystemZ/machinesink-dead-cc.mir b/llvm/test/CodeGen/SystemZ/machinesink-dead-cc.mir
new file mode 100644
index 0000000000000..8516cc9114c76
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/machinesink-dead-cc.mir
@@ -0,0 +1,57 @@
+# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z15 -O3 -run-pass=machine-sink %s -o - \
+# RUN:   -verify-machineinstrs | FileCheck %s
+#
+# Test that the AGHIK can be sunk into %bb.4. It has a def of CC, but it is dead.
+
+--- |
+  define void @fun() {  ret void  }
+...
+
+# CHECK-LABEL: bb.4:
+# CHECK:         %1:gr64bit = nsw AGHIK %0, -4, implicit-def dead $cc
+# CHECK-NEXT:    CGHI %1, 0, implicit-def $cc
+# CHECK-NEXT:    BRC 14, 6, %bb.1, implicit $cc
+# CHECK-NEXT:    J %bb.5
+
+
+---
+name:            fun
+alignment:       16
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr64bit }
+  - { id: 1, class: gr64bit }
+  - { id: 2, class: grx32bit }
+  - { id: 3, class: gr64bit }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+
+    %2:grx32bit = LHIMux 0
+    %3:gr64bit = IMPLICIT_DEF
+
+  bb.1:
+
+    %0:gr64bit = PHI %3, %bb.0, %1, %bb.4
+  
+  bb.2:
+  
+    %1:gr64bit = nsw AGHIK %0, -4, implicit-def dead $cc
+    CHIMux %2, 0, implicit-def $cc
+    BRC 14, 6, %bb.4, implicit $cc
+    J %bb.3
+  
+  bb.3:
+  
+  bb.4:
+  
+    CGHI %1, 0, implicit-def $cc
+    BRC 14, 6, %bb.1, implicit $cc
+    J %bb.5
+  
+  bb.5:
+    Return
+
+...

diff  --git a/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
index 6f9d13cde6b2e..ca839bbb0dced 100644
--- a/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
+++ b/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
@@ -102,17 +102,17 @@ define dso_local void @foo(ptr %a0, ptr %a1, ptr %a2, ptr %a3, ptr %a4, ptr %a5)
 ; CHECK-NEXT:    jns .LBB0_20
 ; CHECK-NEXT:  .LBB0_5: # %a50b
 ; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    shrl $31, %r9d
 ; CHECK-NEXT:    movl %eax, %r10d
 ; CHECK-NEXT:    orl %esi, %r10d
 ; CHECK-NEXT:    jns .LBB0_26
 ; CHECK-NEXT:  .LBB0_6: # %a57b
 ; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    shrl $31, %r10d
+; CHECK-NEXT:    shrl $31, %r9d
 ; CHECK-NEXT:    testb %r9b, %r9b
 ; CHECK-NEXT:    je .LBB0_30
 ; CHECK-NEXT:  .LBB0_7: # %a66b
 ; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    shrl $31, %r10d
 ; CHECK-NEXT:    testb %r10b, %r10b
 ; CHECK-NEXT:    jne .LBB0_8
 ; CHECK-NEXT:    .p2align 4, 0x90

diff  --git a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
index dead31a8ba013..68bdb9235546b 100644
--- a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
+++ b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
@@ -196,10 +196,8 @@ define void @_Z2x6v() local_unnamed_addr {
 ; CHECK-NEXT:    ja .LBB1_14
 ; CHECK-NEXT:  .LBB1_7: # %vector.body.preheader
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    leaq -4(%rcx), %r8
-; CHECK-NEXT:    movq %r8, %r11
-; CHECK-NEXT:    shrq $2, %r11
-; CHECK-NEXT:    btl $2, %r8d
+; CHECK-NEXT:    leaq -4(%rcx), %r11
+; CHECK-NEXT:    btl $2, %r11d
 ; CHECK-NEXT:    jb .LBB1_8
 ; CHECK-NEXT:  # %bb.9: # %vector.body.prol.preheader
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
@@ -208,12 +206,12 @@ define void @_Z2x6v() local_unnamed_addr {
 ; CHECK-NEXT:    movdqu %xmm0, (%r13,%rbp,8)
 ; CHECK-NEXT:    movdqu %xmm0, 16(%r13,%rbp,8)
 ; CHECK-NEXT:    movl $4, %r10d
-; CHECK-NEXT:    testq %r11, %r11
+; CHECK-NEXT:    shrq $2, %r11
 ; CHECK-NEXT:    jne .LBB1_11
 ; CHECK-NEXT:    jmp .LBB1_13
 ; CHECK-NEXT:  .LBB1_8: # in Loop: Header=BB1_2 Depth=1
 ; CHECK-NEXT:    xorl %r10d, %r10d
-; CHECK-NEXT:    testq %r11, %r11
+; CHECK-NEXT:    shrq $2, %r11
 ; CHECK-NEXT:    je .LBB1_13
 ; CHECK-NEXT:  .LBB1_11: # %vector.body.preheader.new
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1

diff  --git a/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll b/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll
index 9c881dd816994..2a98e3dd91ee1 100644
--- a/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll
+++ b/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll
@@ -60,10 +60,11 @@ zero:
 ; CHECK:        JMP_1 %bb.4
 ; CHECK:      bb.4
 ; CHECK:      bb.5
-; CHECK:        %4:gr64 = LEA64r %10, 1, $noreg, 8, $noreg
-; CHECK-LV:     %3:gr64 = COPY killed %10
-; CHECK-LIS:    %3:gr64 = COPY %10
-; CHECK-LV:     TEST64rr killed %1, %1, implicit-def $eflags
+; CHECK:        %3:gr64 = COPY %10
+; CHECK-LV:     %4:gr64 = COPY killed %10
+; CHECK-LV:     %4:gr64 = nuw ADD64ri8 %4, 8, implicit-def dead $eflags
+; CHECK-LIS:    %4:gr64 = LEA64r %10, 1, $noreg, 8, $noreg
+; CHECK:        TEST64rr killed %1, %1, implicit-def $eflags
 ; CHECK:        JCC_1 %bb.1, 5, implicit killed $eflags
 ; CHECK:        JMP_1 %bb.6
 define void @test2(i8 addrspace(1)* %this, i32 %0, i32 addrspace(1)* %p0, i8 addrspace(1)* %p1) gc "statepoint-example" personality i32* ()* @fake_personality_function {


        


More information about the llvm-commits mailing list