[llvm] AMDGPU/GFX12: Fix s_barrier_signal_isfirst for single-wave workgroups (PR #143634)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 10 16:57:10 PDT 2025


Nicolai =?utf-8?q?Hähnle?= <nicolai.haehnle at amd.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/143634 at github.com>


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Nicolai Hähnle (nhaehnle)

<details>
<summary>Changes</summary>

Barrier instructions are no-ops in single-wave workgroups. This includes s_barrier_signal_isfirst, which will leave SCC unmodified.

Model this correctly (via an implicit use of SCC) and ensure SCC==1 before the barrier instruction (if the wave is the only one of the workgroup, then it is the first).

---
Full diff: https://github.com/llvm/llvm-project/pull/143634.diff


5 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+3) 
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+8) 
- (modified) llvm/lib/Target/AMDGPU/SOPInstructions.td (+3) 
- (modified) llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir (+7-3) 
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll (+41) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 7e72f6ca478fd..672520390c8bf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -5918,6 +5918,9 @@ bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
   const DebugLoc &DL = I.getDebugLoc();
   Register CCReg = I.getOperand(0).getReg();
 
+  // Set SCC to true, in case the barrier instruction gets converted to a NOP.
+  BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_CMP_EQ_U32)).addImm(0).addImm(0);
+
   BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
       .addImm(I.getOperand(2).getImm());
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 53dc540cbd635..341d0b98797f5 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5411,6 +5411,14 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     MI.eraseFromParent();
     return BB;
   }
+  case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM: {
+    // Set SCC to true, in case the barrier instruction gets converted to a NOP.
+    BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(),
+            TII->get(AMDGPU::S_CMP_EQ_U32))
+        .addImm(0)
+        .addImm(0);
+    return BB;
+  }
   case AMDGPU::GET_GROUPSTATICSIZE: {
     assert(getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA ||
            getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL);
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index e0a36758534d5..90e65a6950c0a 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -472,6 +472,7 @@ def S_BARRIER_SIGNAL_M0 : SOP1_Pseudo <"s_barrier_signal m0", (outs), (ins),
 def S_BARRIER_SIGNAL_ISFIRST_M0 : SOP1_Pseudo <"s_barrier_signal_isfirst m0", (outs), (ins),
   "", []>{
   let Defs = [SCC];
+  let Uses = [M0, SCC];
   let SchedRW = [WriteBarrier];
   let isConvergent = 1;
 }
@@ -487,6 +488,8 @@ def S_BARRIER_SIGNAL_IMM : SOP1_Pseudo <"s_barrier_signal", (outs),
 def S_BARRIER_SIGNAL_ISFIRST_IMM : SOP1_Pseudo <"s_barrier_signal_isfirst", (outs),
   (ins SplitBarrier:$src0), "$src0", [(set SCC, (int_amdgcn_s_barrier_signal_isfirst timm:$src0))]>{
   let Defs = [SCC];
+  let Uses = [SCC];
+  let usesCustomInserter = 1;
   let SchedRW = [WriteBarrier];
   let isConvergent = 1;
 }
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
index e4b16a3fa0040..f437dee253d00 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
@@ -374,7 +374,8 @@ body: |
   ; CHECK-NEXT:   successors: %bb.2(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   V_NOP_e32 implicit $exec
-  ; CHECK-NEXT:   S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc
+  ; CHECK-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc, implicit $scc
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   S_ENDPGM 0
@@ -385,7 +386,8 @@ body: |
   bb.1:
     successors: %bb.2
     V_NOP_e32 implicit $exec
-    S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc
+    S_CMP_EQ_U32 0, 0, implicit-def $scc
+    S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc, implicit $scc
 
   bb.2:
     S_ENDPGM 0
@@ -437,6 +439,7 @@ body: |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   V_NOP_e32 implicit $exec
   ; CHECK-NEXT:   $m0 = S_MOV_B32 -1
+  ; CHECK-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
@@ -449,7 +452,8 @@ body: |
     successors: %bb.2
     V_NOP_e32 implicit $exec
     $m0 = S_MOV_B32 -1
-    S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc
+    S_CMP_EQ_U32 0, 0, implicit-def $scc
+    S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc, implicit $scc
 
   bb.2:
     S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll
new file mode 100644
index 0000000000000..a5bc15e4c59e1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s
+
+define i1 @func1() {
+; GFX12-SDAG-LABEL: func1:
+; GFX12-SDAG:       ; %bb.0:
+; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT:    s_cmp_eq_u32 0, 0
+; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
+; GFX12-SDAG-NEXT:    s_barrier_signal_isfirst -1
+; GFX12-SDAG-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
+; GFX12-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: func1:
+; GFX12-GISEL:       ; %bb.0:
+; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT:    s_cmp_eq_u32 0, 0
+; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
+; GFX12-GISEL-NEXT:    s_barrier_signal_isfirst -1
+; GFX12-GISEL-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
+; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
+    %r = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 -1)
+    ret i1 %r
+}
+
+declare i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32)

``````````

</details>


https://github.com/llvm/llvm-project/pull/143634


More information about the llvm-commits mailing list