[llvm] [AMDGPU] Allow hoisting of V_CMP feeding into SI_IF and SI_IF_BREAK (PR #71401)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 6 06:31:18 PST 2023


https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/71401

The lowering of SI_IF and SI_IF_BREAK includes an AND with EXEC (unless
it is provably not required), so compares feeding into these pseudo ops
should be just as hoistable as compares feeding into explicit ANDs with
EXEC.


>From 67724cb249bddc89b6182b3d29c58367dcade51c Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Mon, 6 Nov 2023 14:28:26 +0000
Subject: [PATCH] [AMDGPU] Allow hoisting of V_CMP feeding into SI_IF and
 SI_IF_BREAK

The lowering of SI_IF and SI_IF_BREAK includes an AND with EXEC (unless
it is provably not required), so compares feeding into these pseudo ops
should be just as hoistable as compares feeding into explicit ANDs with
EXEC.
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        | 13 +++-
 .../test/CodeGen/AMDGPU/loop_exit_with_xor.ll | 22 ++++---
 .../si-unify-exit-return-unreachable.ll       | 60 +++++++++----------
 3 files changed, 50 insertions(+), 45 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d73328d67f6078e..d698f1a7a9d6d35 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -156,14 +156,21 @@ static bool resultDependsOnExec(const MachineInstr &MI) {
     Register DstReg = MI.getOperand(0).getReg();
     if (!DstReg.isVirtual())
       return true;
-    for (MachineInstr &Use : MRI.use_nodbg_instructions(DstReg)) {
-      switch (Use.getOpcode()) {
+    for (MachineOperand &Use : MRI.use_nodbg_operands(DstReg)) {
+      MachineInstr &User = *Use.getParent();
+      switch (User.getOpcode()) {
       case AMDGPU::S_AND_SAVEEXEC_B32:
       case AMDGPU::S_AND_SAVEEXEC_B64:
         break;
+      case AMDGPU::SI_IF:
+      case AMDGPU::SI_IF_BREAK:
+        if (Use.getOperandNo() != 1)
+          return true;
+        // These are lowered to an AND with EXEC.
+        break;
       case AMDGPU::S_AND_B32:
       case AMDGPU::S_AND_B64:
-        if (!Use.readsRegister(AMDGPU::EXEC))
+        if (!User.readsRegister(AMDGPU::EXEC))
           return true;
         break;
       default:
diff --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
index a407cd20bf76244..e32e372e14ed4dd 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
@@ -10,30 +10,28 @@ define void @needs_and(i32 %arg) {
 ; GCN-LABEL: needs_and:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s10, 1
-; GCN-NEXT:    s_mov_b64 s[6:7], 0
+; GCN-NEXT:    s_mov_b32 s8, 1
+; GCN-NEXT:    s_mov_b64 s[4:5], 0
 ; GCN-NEXT:    s_branch .LBB0_2
 ; GCN-NEXT:  .LBB0_1: ; %endif
 ; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
-; GCN-NEXT:    s_or_b64 exec, exec, s[8:9]
-; GCN-NEXT:    s_and_b64 s[4:5], exec, vcc
-; GCN-NEXT:    s_or_b64 s[6:7], s[4:5], s[6:7]
-; GCN-NEXT:    s_add_i32 s10, s10, 1
-; GCN-NEXT:    s_andn2_b64 exec, exec, s[6:7]
+; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
+; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GCN-NEXT:    s_add_i32 s8, s8, 1
+; GCN-NEXT:    s_andn2_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    s_cbranch_execz .LBB0_4
 ; GCN-NEXT:  .LBB0_2: ; %loop
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_cmp_gt_u32_e64 s[4:5], s10, v0
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s10, v0
-; GCN-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
+; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, s8, v0
+; GCN-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; GCN-NEXT:    s_cbranch_execz .LBB0_1
 ; GCN-NEXT:  ; %bb.3: ; %then
 ; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
-; GCN-NEXT:    s_nop 1
 ; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], s4
 ; GCN-NEXT:    s_branch .LBB0_1
 ; GCN-NEXT:  .LBB0_4: ; %loopexit
-; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll b/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
index 1eef7b967f6d998..1e703cb757e21b4 100644
--- a/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
@@ -110,22 +110,22 @@ define void @my_func(i32 %0) {
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    s_mov_b64 s[4:5], 0
 ; GCN-NEXT:    s_load_dword s10, s[4:5], 0x0
-; GCN-NEXT:    s_mov_b64 s[8:9], -1
+; GCN-NEXT:    s_mov_b64 s[6:7], -1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_cmp_lt_i32 s10, 1
-; GCN-NEXT:    s_mov_b64 s[6:7], 0
+; GCN-NEXT:    s_mov_b64 s[8:9], 0
 ; GCN-NEXT:    s_cbranch_scc1 .LBB0_7
 ; GCN-NEXT:  ; %bb.1: ; %LeafBlock1
 ; GCN-NEXT:    s_cmp_lg_u32 s10, 1
-; GCN-NEXT:    s_cselect_b64 s[6:7], -1, 0
+; GCN-NEXT:    s_cselect_b64 s[8:9], -1, 0
 ; GCN-NEXT:    s_mov_b64 vcc, exec
 ; GCN-NEXT:    s_cbranch_execz .LBB0_8
 ; GCN-NEXT:  .LBB0_2: ; %Flow11
-; GCN-NEXT:    s_and_saveexec_b64 s[8:9], s[6:7]
+; GCN-NEXT:    s_and_saveexec_b64 s[6:7], s[8:9]
 ; GCN-NEXT:  .LBB0_3: ; %do.body
 ; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
 ; GCN-NEXT:  .LBB0_4: ; %Flow17
-; GCN-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GCN-NEXT:  ; %bb.5: ; %UnifiedUnreachableBlock
 ; GCN-NEXT:    ; divergent unreachable
@@ -133,58 +133,58 @@ define void @my_func(i32 %0) {
 ; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ; GCN-NEXT:  .LBB0_7: ; %Flow
-; GCN-NEXT:    s_andn2_b64 vcc, exec, s[8:9]
+; GCN-NEXT:    s_andn2_b64 vcc, exec, s[6:7]
 ; GCN-NEXT:    s_cbranch_vccnz .LBB0_2
 ; GCN-NEXT:  .LBB0_8: ; %LeafBlock
 ; GCN-NEXT:    s_cmp_eq_u32 s10, 0
 ; GCN-NEXT:    s_cbranch_scc1 .LBB0_10
 ; GCN-NEXT:  ; %bb.9:
-; GCN-NEXT:    s_mov_b64 s[6:7], -1
-; GCN-NEXT:    s_and_saveexec_b64 s[8:9], s[6:7]
+; GCN-NEXT:    s_mov_b64 s[8:9], -1
+; GCN-NEXT:    s_and_saveexec_b64 s[6:7], s[8:9]
 ; GCN-NEXT:    s_cbranch_execnz .LBB0_3
 ; GCN-NEXT:    s_branch .LBB0_4
 ; GCN-NEXT:  .LBB0_10: ; %NodeBlock7
 ; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 1, v0
+; GCN-NEXT:    s_mov_b64 s[10:11], 0
 ; GCN-NEXT:    s_mov_b64 s[8:9], 0
-; GCN-NEXT:    s_mov_b64 s[6:7], 0
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GCN-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
 ; GCN-NEXT:  ; %bb.11: ; %LeafBlock5
-; GCN-NEXT:    s_mov_b64 s[6:7], exec
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
-; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
+; GCN-NEXT:    s_mov_b64 s[8:9], exec
+; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 2, v0
+; GCN-NEXT:    s_and_b64 s[10:11], s[4:5], exec
+; GCN-NEXT:    ; implicit-def: $vgpr0
 ; GCN-NEXT:  ; %bb.12: ; %Flow13
-; GCN-NEXT:    s_andn2_saveexec_b64 s[10:11], s[4:5]
+; GCN-NEXT:    s_andn2_saveexec_b64 s[12:13], s[6:7]
 ; GCN-NEXT:  ; %bb.13: ; %LeafBlock3
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GCN-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
-; GCN-NEXT:    s_andn2_b64 s[6:7], s[6:7], exec
+; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v0
 ; GCN-NEXT:    s_andn2_b64 s[8:9], s[8:9], exec
+; GCN-NEXT:    s_andn2_b64 s[10:11], s[10:11], exec
+; GCN-NEXT:    s_and_b64 s[6:7], s[6:7], exec
 ; GCN-NEXT:    s_and_b64 s[4:5], s[4:5], exec
-; GCN-NEXT:    s_and_b64 s[12:13], vcc, exec
-; GCN-NEXT:    s_or_b64 s[6:7], s[6:7], s[4:5]
-; GCN-NEXT:    s_or_b64 s[8:9], s[8:9], s[12:13]
+; GCN-NEXT:    s_or_b64 s[8:9], s[8:9], s[6:7]
+; GCN-NEXT:    s_or_b64 s[10:11], s[10:11], s[4:5]
 ; GCN-NEXT:  ; %bb.14: ; %Flow14
-; GCN-NEXT:    s_or_b64 exec, exec, s[10:11]
+; GCN-NEXT:    s_or_b64 exec, exec, s[12:13]
 ; GCN-NEXT:    s_mov_b64 s[4:5], 0
-; GCN-NEXT:    s_and_saveexec_b64 s[10:11], s[8:9]
+; GCN-NEXT:    s_and_saveexec_b64 s[6:7], s[10:11]
 ; GCN-NEXT:    s_cbranch_execz .LBB0_18
 ; GCN-NEXT:  ; %bb.15: ; %LeafBlock9
-; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 1, v0
-; GCN-NEXT:    s_mov_b64 s[8:9], -1
+; GCN-NEXT:    s_mov_b64 s[10:11], -1
 ; GCN-NEXT:    s_and_saveexec_b64 s[12:13], vcc
 ; GCN-NEXT:  ; %bb.16: ; %do.body.i.i.i.i
 ; GCN-NEXT:    s_mov_b64 s[4:5], exec
-; GCN-NEXT:    s_xor_b64 s[8:9], exec, -1
+; GCN-NEXT:    s_xor_b64 s[10:11], exec, -1
 ; GCN-NEXT:  ; %bb.17: ; %Flow16
 ; GCN-NEXT:    s_or_b64 exec, exec, s[12:13]
 ; GCN-NEXT:    s_and_b64 s[4:5], s[4:5], exec
-; GCN-NEXT:    s_andn2_b64 s[6:7], s[6:7], exec
-; GCN-NEXT:    s_and_b64 s[8:9], s[8:9], exec
-; GCN-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
+; GCN-NEXT:    s_andn2_b64 s[8:9], s[8:9], exec
+; GCN-NEXT:    s_and_b64 s[10:11], s[10:11], exec
+; GCN-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
 ; GCN-NEXT:  .LBB0_18: ; %Flow15
-; GCN-NEXT:    s_or_b64 exec, exec, s[10:11]
-; GCN-NEXT:    s_and_saveexec_b64 s[8:9], s[6:7]
+; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GCN-NEXT:    s_and_saveexec_b64 s[6:7], s[8:9]
 ; GCN-NEXT:    s_cbranch_execnz .LBB0_3
 ; GCN-NEXT:    s_branch .LBB0_4
 entry:



More information about the llvm-commits mailing list