[llvm] [AMDGPU] Fix undefined scc register in successor block of SI_KILL terminators (PR #134718)

via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 25 10:06:34 PDT 2025


https://github.com/mssefat updated https://github.com/llvm/llvm-project/pull/134718

>From b98e1634738c2c052c8969c997c9569534985eec Mon Sep 17 00:00:00 2001
From: mssefat <syadus.sefat at gmail.com>
Date: Mon, 7 Apr 2025 12:15:37 -0400
Subject: [PATCH 1/3] [AMDGPU] Fix undefined $scc in successor blocks of
 SI_KILL terminators

Fixes #131298

Fix issue 131298 where an undefined $scc register causes verifier errors
when using SI_KILL_F32_COND_IMM_TERMINATOR instructions. The problem occurs
because the $scc register defined in a comparison before the kill terminator
is used in successor blocks, but was not properly marked as live-in.

This patch:
- Adds code to check if SCC is used in the successor block
- Adds SCC as a live-in to successor blocks
- Handles both explicit and implicit uses of SCC

With this patch the machine verifier no longer reports undefined $scc errors in
following kill terminator instruction.
---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |  29 +++++
 llvm/test/CodeGen/AMDGPU/skip-if-dead.ll  | 151 ++++++++++++++++++++++
 2 files changed, 180 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 356040da95672..9a5bddd29f8da 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4516,6 +4516,35 @@ SITargetLowering::splitKillBlock(MachineInstr &MI,
   MachineBasicBlock *SplitBB = BB->splitAt(MI, false /*UpdateLiveIns*/);
   const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
   MI.setDesc(TII->getKillTerminatorFromPseudo(MI.getOpcode()));
+
+  // Check if SCC register is used in the successor block
+  bool IsSCCUsedInSuccessor = false;
+  for (const MachineInstr &SuccMI : *SplitBB) {
+    // Check for explicit uses of SCC in the instruction's operands
+    for (const MachineOperand &MO : SuccMI.operands()) {
+      if (MO.isReg() && MO.getReg() == AMDGPU::SCC && !MO.isDef()) {
+        IsSCCUsedInSuccessor = true;
+        break;
+      }
+    }
+
+    // Also check for implicit uses of SCC
+    const MCInstrDesc &Desc = SuccMI.getDesc();
+    if (Desc.hasImplicitUseOfPhysReg(AMDGPU::SCC)) {
+      IsSCCUsedInSuccessor = true;
+      break;
+    }
+    if (IsSCCUsedInSuccessor)
+      break;
+  }
+
+  // Only add SCC as implicit def and live-in if it's actually used in successor
+  if (IsSCCUsedInSuccessor) {
+    MI.addOperand(
+        MachineOperand::CreateReg(AMDGPU::SCC, true, true, false, false));
+    SplitBB->addLiveIn(AMDGPU::SCC);
+  }
+
   return SplitBB;
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
index 7b512db84bd9e..8559361707fe3 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -1956,6 +1956,157 @@ bb.1:
   ret void
 }
 
+define amdgpu_ps void @scc_use_after_kill_inst(float inreg %x, i32 inreg %y) #0 {
+; SI-LABEL: scc_use_after_kill_inst:
+; SI:       ; %bb.0: ; %bb
+; SI-NEXT:    v_add_f32_e64 v1, s0, 1.0
+; SI-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
+; SI-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
+; SI-NEXT:    s_andn2_b64 exec, exec, vcc
+; SI-NEXT:    s_cbranch_scc0 .LBB17_6
+; SI-NEXT:  ; %bb.1: ; %bb
+; SI-NEXT:    s_andn2_b64 exec, exec, vcc
+; SI-NEXT:    s_cbranch_scc0 .LBB17_3
+; SI-NEXT:  ; %bb.2: ; %bb8
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    v_mov_b32_e32 v0, 8
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v0, 4.0
+; SI-NEXT:  .LBB17_3: ; %phibb
+; SI-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
+; SI-NEXT:    s_cbranch_vccz .LBB17_5
+; SI-NEXT:  ; %bb.4: ; %bb10
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    v_mov_b32_e32 v0, 9
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:  .LBB17_5: ; %end
+; SI-NEXT:    s_endpgm
+; SI-NEXT:  .LBB17_6:
+; SI-NEXT:    s_mov_b64 exec, 0
+; SI-NEXT:    exp null off, off, off, off done vm
+; SI-NEXT:    s_endpgm
+;
+; GFX10-WAVE64-LABEL: scc_use_after_kill_inst:
+; GFX10-WAVE64:       ; %bb.0: ; %bb
+; GFX10-WAVE64-NEXT:    v_add_f32_e64 v1, s0, 1.0
+; GFX10-WAVE64-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
+; GFX10-WAVE64-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
+; GFX10-WAVE64-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
+; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
+; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB17_6
+; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb
+; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
+; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB17_3
+; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb8
+; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v1, 8
+; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 4.0
+; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v1, off
+; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-WAVE64-NEXT:  .LBB17_3: ; %phibb
+; GFX10-WAVE64-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
+; GFX10-WAVE64-NEXT:    s_cbranch_vccz .LBB17_5
+; GFX10-WAVE64-NEXT:  ; %bb.4: ; %bb10
+; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 9
+; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v0, off
+; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-WAVE64-NEXT:  .LBB17_5: ; %end
+; GFX10-WAVE64-NEXT:    s_endpgm
+; GFX10-WAVE64-NEXT:  .LBB17_6:
+; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
+; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
+; GFX10-WAVE64-NEXT:    s_endpgm
+;
+; GFX10-WAVE32-LABEL: scc_use_after_kill_inst:
+; GFX10-WAVE32:       ; %bb.0: ; %bb
+; GFX10-WAVE32-NEXT:    v_add_f32_e64 v1, s0, 1.0
+; GFX10-WAVE32-NEXT:    v_cmp_lt_f32_e32 vcc_lo, 0, v1
+; GFX10-WAVE32-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
+; GFX10-WAVE32-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, 0, v1
+; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
+; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB17_6
+; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb
+; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
+; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB17_3
+; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb8
+; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v1, 8
+; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 4.0
+; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v1, off
+; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-WAVE32-NEXT:  .LBB17_3: ; %phibb
+; GFX10-WAVE32-NEXT:    v_cmp_eq_f32_e32 vcc_lo, 0, v0
+; GFX10-WAVE32-NEXT:    s_cbranch_vccz .LBB17_5
+; GFX10-WAVE32-NEXT:  ; %bb.4: ; %bb10
+; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 9
+; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v0, off
+; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-WAVE32-NEXT:  .LBB17_5: ; %end
+; GFX10-WAVE32-NEXT:    s_endpgm
+; GFX10-WAVE32-NEXT:  .LBB17_6:
+; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
+; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
+; GFX10-WAVE32-NEXT:    s_endpgm
+;
+; GFX11-LABEL: scc_use_after_kill_inst:
+; GFX11:       ; %bb.0: ; %bb
+; GFX11-NEXT:    v_add_f32_e64 v1, s0, 1.0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
+; GFX11-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
+; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
+; GFX11-NEXT:    s_cbranch_scc0 .LBB17_6
+; GFX11-NEXT:  ; %bb.1: ; %bb
+; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
+; GFX11-NEXT:    s_cbranch_scc0 .LBB17_3
+; GFX11-NEXT:  ; %bb.2: ; %bb8
+; GFX11-NEXT:    v_mov_b32_e32 v1, 8
+; GFX11-NEXT:    v_mov_b32_e32 v0, 4.0
+; GFX11-NEXT:    global_store_b32 v[0:1], v1, off dlc
+; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT:  .LBB17_3: ; %phibb
+; GFX11-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
+; GFX11-NEXT:    s_cbranch_vccz .LBB17_5
+; GFX11-NEXT:  ; %bb.4: ; %bb10
+; GFX11-NEXT:    v_mov_b32_e32 v0, 9
+; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
+; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT:  .LBB17_5: ; %end
+; GFX11-NEXT:    s_endpgm
+; GFX11-NEXT:  .LBB17_6:
+; GFX11-NEXT:    s_mov_b64 exec, 0
+; GFX11-NEXT:    exp mrt0 off, off, off, off done
+; GFX11-NEXT:    s_endpgm
+bb:
+  %tmp = fadd float %x, 1.000000e+00
+  %tmp1 = fcmp olt float 0.000000e+00, %tmp
+  %tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00
+  %cmp.tmp2 = fcmp olt float %tmp2, 0.000000e+00
+  %uniform.cond = icmp eq i32 %y, 0
+  call void @llvm.amdgcn.kill(i1 %cmp.tmp2)
+  br i1 %uniform.cond, label %phibb, label %bb8
+
+phibb:                                            ; preds = %bb8, %bb
+  %tmp5 = phi float [ %tmp2, %bb ], [ 4.000000e+00, %bb8 ]
+  %tmp6 = fcmp oeq float %tmp5, 0.000000e+00
+  br i1 %tmp6, label %bb10, label %end
+
+bb8:                                              ; preds = %bb
+  store volatile i32 8, ptr addrspace(1) poison, align 4
+  br label %phibb
+
+bb10:                                             ; preds = %phibb
+  store volatile i32 9, ptr addrspace(1) poison, align 4
+  br label %end
+
+end:                                              ; preds = %bb10, %phibb
+  ret void
+}
+
 declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3
 declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

>From 2f04569a6073d9453c30936cbb123a93d2d10adb Mon Sep 17 00:00:00 2001
From: mssefat <syadus.sefat at gmail.com>
Date: Mon, 7 Apr 2025 15:42:32 -0400
Subject: [PATCH 2/3] [AMDGPU] Fix undefined $scc in successor blocks of
 SI_KILL terminators

Fixes #131298

Fix issue 131298 where an undefined $scc register causes verifier errors when using SI_KILL_F32_COND_IMM_TERMINATOR instructions. The problem occurs because the $scc register defined in a comparison before the kill terminator is used in successor blocks, but was not properly marked as live-in.

This patch:
- Adds code to check if SCC is used in the successor block
- Adds SCC as a live-in to successor blocks
- Handles both explicit and implicit uses of SCC

With this patch the machine verifier no longer reports undefined $scc errors in following kill terminator instruction.
---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9a5bddd29f8da..c7620ee8c19db 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4520,7 +4520,7 @@ SITargetLowering::splitKillBlock(MachineInstr &MI,
   // Check if SCC register is used in the successor block
   bool IsSCCUsedInSuccessor = false;
   for (const MachineInstr &SuccMI : *SplitBB) {
-    // Check for explicit uses of SCC in the instruction's operands
+    // Check for uses of SCC in the instruction's operands
     for (const MachineOperand &MO : SuccMI.operands()) {
       if (MO.isReg() && MO.getReg() == AMDGPU::SCC && !MO.isDef()) {
         IsSCCUsedInSuccessor = true;
@@ -4529,16 +4529,19 @@ SITargetLowering::splitKillBlock(MachineInstr &MI,
     }
 
     // Also check for implicit uses of SCC
-    const MCInstrDesc &Desc = SuccMI.getDesc();
-    if (Desc.hasImplicitUseOfPhysReg(AMDGPU::SCC)) {
-      IsSCCUsedInSuccessor = true;
-      break;
+    if(!IsSCCUsedInSuccessor){
+      const MCInstrDesc &Desc = SuccMI.getDesc();
+      if (Desc.hasImplicitUseOfPhysReg(AMDGPU::SCC)) {
+        IsSCCUsedInSuccessor = true;
+        break;
+      }
     }
+    
     if (IsSCCUsedInSuccessor)
       break;
   }
 
-  // Only add SCC as implicit def and live-in if it's actually used in successor
+  // Add SCC as implicit def and live-in SCC if used in successor
   if (IsSCCUsedInSuccessor) {
     MI.addOperand(
         MachineOperand::CreateReg(AMDGPU::SCC, true, true, false, false));

>From b058b0e0b3a9178aaea7f33b5d683d1a4b56aaeb Mon Sep 17 00:00:00 2001
From: mssefat <syadus.sefat at gmail.com>
Date: Fri, 25 Apr 2025 13:04:40 -0400
Subject: [PATCH 3/3] [AMDGPU] Fix undefined scc register in successor block of
 SI_KILL terminators

Fixes #131298

This patch addresses issue #131298, where the use of SI_KILL_F32_COND_IMM_TERMINATOR instructions leads to verifier errors due to an undefined $scc register. The error occurs when $scc, defined by a comparison instruction prior to the kill terminator, is used in successor blocks without being correctly marked as a live-in.

To fix this:

The call to finalizeLowering is moved to the beginning of FinalizeISel::runImpl to ensure reserved registers are frozen before live-ins are added.

In SITargetLowering::splitKillBlock, the splitAt function is now called with UpdateLiveIns = true to properly update live-in registers.

With these changes, the machine verifier no longer reports undefined $scc errors for kill terminator instructions.
---
 llvm/lib/CodeGen/FinalizeISel.cpp             |  5 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     | 34 +--------
 .../AMDGPU/finalize-isel-kill-scc-vcc.mir     | 73 +++++++++++++++++++
 llvm/test/CodeGen/AMDGPU/skip-if-dead.ll      | 16 +++-
 4 files changed, 88 insertions(+), 40 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/finalize-isel-kill-scc-vcc.mir

diff --git a/llvm/lib/CodeGen/FinalizeISel.cpp b/llvm/lib/CodeGen/FinalizeISel.cpp
index 477512dc6b032..7069a8862b7df 100644
--- a/llvm/lib/CodeGen/FinalizeISel.cpp
+++ b/llvm/lib/CodeGen/FinalizeISel.cpp
@@ -47,6 +47,8 @@ static std::pair<bool, bool> runImpl(MachineFunction &MF) {
   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
   const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
 
+  TLI->finalizeLowering(MF);
+
   // Iterate through each instruction in the function, looking for pseudos.
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     MachineBasicBlock *MBB = &*I;
@@ -74,9 +76,6 @@ static std::pair<bool, bool> runImpl(MachineFunction &MF) {
       }
     }
   }
-
-  TLI->finalizeLowering(MF);
-
   return {Changed, PreserveCFG};
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c7620ee8c19db..f76d7ea9d4141 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4513,41 +4513,9 @@ Register SITargetLowering::getRegisterByName(const char *RegName, LLT VT,
 MachineBasicBlock *
 SITargetLowering::splitKillBlock(MachineInstr &MI,
                                  MachineBasicBlock *BB) const {
-  MachineBasicBlock *SplitBB = BB->splitAt(MI, false /*UpdateLiveIns*/);
+  MachineBasicBlock *SplitBB = BB->splitAt(MI, true /*UpdateLiveIns*/);
   const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
   MI.setDesc(TII->getKillTerminatorFromPseudo(MI.getOpcode()));
-
-  // Check if SCC register is used in the successor block
-  bool IsSCCUsedInSuccessor = false;
-  for (const MachineInstr &SuccMI : *SplitBB) {
-    // Check for uses of SCC in the instruction's operands
-    for (const MachineOperand &MO : SuccMI.operands()) {
-      if (MO.isReg() && MO.getReg() == AMDGPU::SCC && !MO.isDef()) {
-        IsSCCUsedInSuccessor = true;
-        break;
-      }
-    }
-
-    // Also check for implicit uses of SCC
-    if(!IsSCCUsedInSuccessor){
-      const MCInstrDesc &Desc = SuccMI.getDesc();
-      if (Desc.hasImplicitUseOfPhysReg(AMDGPU::SCC)) {
-        IsSCCUsedInSuccessor = true;
-        break;
-      }
-    }
-    
-    if (IsSCCUsedInSuccessor)
-      break;
-  }
-
-  // Add SCC as implicit def and live-in SCC if used in successor
-  if (IsSCCUsedInSuccessor) {
-    MI.addOperand(
-        MachineOperand::CreateReg(AMDGPU::SCC, true, true, false, false));
-    SplitBB->addLiveIn(AMDGPU::SCC);
-  }
-
   return SplitBB;
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/finalize-isel-kill-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/finalize-isel-kill-scc-vcc.mir
new file mode 100644
index 0000000000000..c33ed34e8cb40
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/finalize-isel-kill-scc-vcc.mir
@@ -0,0 +1,73 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -run-pass finalize-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs %s -o - | FileCheck %s
+---
+name: phi_use_def_before_kill
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: phi_use_def_before_kill
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr0, $sgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
+  ; CHECK-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, killed [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+  ; CHECK-NEXT:   [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[S_MOV_B32_1]], 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 -1082130432
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_2]]
+  ; CHECK-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[S_MOV_B32_1]], 0, [[COPY2]], killed [[V_CMP_GT_F32_e64_]], implicit $exec
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[V_CNDMASK_B32_e64_]]
+  ; CHECK-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; CHECK-NEXT:   S_CMP_LG_U32 [[COPY]], killed [[S_MOV_B32_3]], implicit-def $scc
+  ; CHECK-NEXT:   SI_KILL_F32_COND_IMM_TERMINATOR [[V_ADD_F32_e64_]], 0, 2, implicit-def $vcc_lo, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $vcc_lo, $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+  ; CHECK-NEXT:   [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, killed [[S_MOV_B32_4]], 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+  ; CHECK-NEXT:   [[V_CMP_EQ_F32_e64_1:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, killed [[S_MOV_B32_5]], 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    %3:sgpr_32 = COPY $sgpr1
+    %2:sgpr_32 = COPY $sgpr0
+    %5:sgpr_32 = S_MOV_B32 1065353216
+    %6:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %2:sgpr_32, 0, killed %5:sgpr_32, 0, 0, implicit $mode, implicit $exec
+    %7:sgpr_32 = S_MOV_B32 0
+    %8:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F32_e64 0, %6:vgpr_32, 0, %7:sgpr_32, 0, implicit $mode, implicit $exec
+    %9:sgpr_32 = S_MOV_B32 -1082130432
+    %11:vgpr_32 = COPY killed %9:sgpr_32
+    %10:vgpr_32 = V_CNDMASK_B32_e64 0, %7:sgpr_32, 0, %11:vgpr_32, killed %8:sreg_32_xm0_xexec, implicit $exec
+    %0:sgpr_32 = COPY %10:vgpr_32
+    %12:sreg_32 = S_MOV_B32 0
+    S_CMP_LG_U32 %3:sgpr_32, killed %12:sreg_32, implicit-def $scc
+    SI_KILL_F32_COND_IMM_PSEUDO %6:vgpr_32, 0, 2, implicit-def $vcc, implicit $exec
+    S_CBRANCH_SCC1 %bb.1, implicit $scc
+    S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+    S_BRANCH %bb.2
+
+  bb.1:
+    %13:sgpr_32 = S_MOV_B32 0
+    %14:sreg_32 = nofpexcept V_CMP_EQ_F32_e64 0, %3:sgpr_32, 0, killed %13:sgpr_32, 0, implicit $mode, implicit $exec
+    S_ENDPGM 0
+
+  bb.2:
+    %15:sgpr_32 = S_MOV_B32 0
+    %16:sreg_32 = nofpexcept V_CMP_EQ_F32_e64 0, %3:sgpr_32, 0, killed %15:sgpr_32, 0, implicit $mode, implicit $exec
+    S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
index 8559361707fe3..6fc92bce8242e 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -1961,9 +1961,11 @@ define amdgpu_ps void @scc_use_after_kill_inst(float inreg %x, i32 inreg %y) #0
 ; SI:       ; %bb.0: ; %bb
 ; SI-NEXT:    v_add_f32_e64 v1, s0, 1.0
 ; SI-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
+; SI-NEXT:    s_mov_b64 s[2:3], exec
+; SI-NEXT:    s_cmp_lg_u32 s1, 0
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
 ; SI-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
-; SI-NEXT:    s_andn2_b64 exec, exec, vcc
+; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
 ; SI-NEXT:    s_cbranch_scc0 .LBB17_6
 ; SI-NEXT:  ; %bb.1: ; %bb
 ; SI-NEXT:    s_andn2_b64 exec, exec, vcc
@@ -1994,10 +1996,12 @@ define amdgpu_ps void @scc_use_after_kill_inst(float inreg %x, i32 inreg %y) #0
 ; GFX10-WAVE64-LABEL: scc_use_after_kill_inst:
 ; GFX10-WAVE64:       ; %bb.0: ; %bb
 ; GFX10-WAVE64-NEXT:    v_add_f32_e64 v1, s0, 1.0
+; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
+; GFX10-WAVE64-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX10-WAVE64-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
 ; GFX10-WAVE64-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
 ; GFX10-WAVE64-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
-; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
+; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
 ; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB17_6
 ; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb
 ; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
@@ -2024,10 +2028,12 @@ define amdgpu_ps void @scc_use_after_kill_inst(float inreg %x, i32 inreg %y) #0
 ; GFX10-WAVE32-LABEL: scc_use_after_kill_inst:
 ; GFX10-WAVE32:       ; %bb.0: ; %bb
 ; GFX10-WAVE32-NEXT:    v_add_f32_e64 v1, s0, 1.0
+; GFX10-WAVE32-NEXT:    s_mov_b32 s2, exec_lo
+; GFX10-WAVE32-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX10-WAVE32-NEXT:    v_cmp_lt_f32_e32 vcc_lo, 0, v1
 ; GFX10-WAVE32-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
 ; GFX10-WAVE32-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, 0, v1
-; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
+; GFX10-WAVE32-NEXT:    s_andn2_b32 s2, s2, vcc_lo
 ; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB17_6
 ; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb
 ; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
@@ -2054,11 +2060,13 @@ define amdgpu_ps void @scc_use_after_kill_inst(float inreg %x, i32 inreg %y) #0
 ; GFX11-LABEL: scc_use_after_kill_inst:
 ; GFX11:       ; %bb.0: ; %bb
 ; GFX11-NEXT:    v_add_f32_e64 v1, s0, 1.0
+; GFX11-NEXT:    s_mov_b64 s[2:3], exec
+; GFX11-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
 ; GFX11-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
-; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
+; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], vcc
 ; GFX11-NEXT:    s_cbranch_scc0 .LBB17_6
 ; GFX11-NEXT:  ; %bb.1: ; %bb
 ; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc



More information about the llvm-commits mailing list