[llvm] AMDGPU/GFX12: Fix s_barrier_signal_isfirst for single-wave workgroups (PR #143634)
Nicolai Hähnle via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 10 17:07:22 PDT 2025
https://github.com/nhaehnle updated https://github.com/llvm/llvm-project/pull/143634
>From 2676f404cd3600d8df716437960900e89240b0ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nicolai.haehnle at amd.com>
Date: Tue, 10 Jun 2025 16:50:05 -0700
Subject: [PATCH 1/4] AMDGPU: Precommit a lit test
---
.../llvm.amdgcn.s.barrier.signal.isfirst.ll | 39 +++++++++++++++++++
1 file changed, 39 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll
new file mode 100644
index 0000000000000..8c1260716f357
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s
+
+define i1 @func1() {
+; GFX12-SDAG-LABEL: func1:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
+; GFX12-SDAG-NEXT: s_barrier_signal_isfirst -1
+; GFX12-SDAG-NEXT: s_cselect_b32 s0, -1, 0
+; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
+; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: func1:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
+; GFX12-GISEL-NEXT: s_barrier_signal_isfirst -1
+; GFX12-GISEL-NEXT: s_cselect_b32 s0, 1, 0
+; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %r = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 -1)
+ ret i1 %r
+}
+
+declare i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32)
>From 28ea0466e8243625c476725f5e6bf62b4ec06dcd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nicolai.haehnle at amd.com>
Date: Tue, 10 Jun 2025 15:48:09 -0700
Subject: [PATCH 2/4] AMDGPU/GFX12: Fix s_barrier_signal_isfirst for
single-wave workgroups
Barrier instructions are no-ops in single-wave workgroups. This includes
s_barrier_signal_isfirst, which will leave SCC unmodified.
Model this correctly (via an implicit use of SCC) and ensure SCC==1
before the barrier instruction (if the wave is the only one of the
workgroup, then it is the first).
---
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 3 +++
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 8 ++++++++
llvm/lib/Target/AMDGPU/SOPInstructions.td | 3 +++
llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir | 10 +++++++---
.../AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll | 2 ++
5 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 7e72f6ca478fd..672520390c8bf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -5918,6 +5918,9 @@ bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
const DebugLoc &DL = I.getDebugLoc();
Register CCReg = I.getOperand(0).getReg();
+ // Set SCC to true, in case the barrier instruction gets converted to a NOP.
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_CMP_EQ_U32)).addImm(0).addImm(0);
+
BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
.addImm(I.getOperand(2).getImm());
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 53dc540cbd635..341d0b98797f5 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5411,6 +5411,14 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.eraseFromParent();
return BB;
}
+ case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM: {
+ // Set SCC to true, in case the barrier instruction gets converted to a NOP.
+ BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(),
+ TII->get(AMDGPU::S_CMP_EQ_U32))
+ .addImm(0)
+ .addImm(0);
+ return BB;
+ }
case AMDGPU::GET_GROUPSTATICSIZE: {
assert(getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA ||
getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL);
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index e0a36758534d5..90e65a6950c0a 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -472,6 +472,7 @@ def S_BARRIER_SIGNAL_M0 : SOP1_Pseudo <"s_barrier_signal m0", (outs), (ins),
def S_BARRIER_SIGNAL_ISFIRST_M0 : SOP1_Pseudo <"s_barrier_signal_isfirst m0", (outs), (ins),
"", []>{
let Defs = [SCC];
+ let Uses = [M0, SCC];
let SchedRW = [WriteBarrier];
let isConvergent = 1;
}
@@ -487,6 +488,8 @@ def S_BARRIER_SIGNAL_IMM : SOP1_Pseudo <"s_barrier_signal", (outs),
def S_BARRIER_SIGNAL_ISFIRST_IMM : SOP1_Pseudo <"s_barrier_signal_isfirst", (outs),
(ins SplitBarrier:$src0), "$src0", [(set SCC, (int_amdgcn_s_barrier_signal_isfirst timm:$src0))]>{
let Defs = [SCC];
+ let Uses = [SCC];
+ let usesCustomInserter = 1;
let SchedRW = [WriteBarrier];
let isConvergent = 1;
}
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
index e4b16a3fa0040..f437dee253d00 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
@@ -374,7 +374,8 @@ body: |
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: V_NOP_e32 implicit $exec
- ; CHECK-NEXT: S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc
+ ; CHECK-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
+ ; CHECK-NEXT: S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc, implicit $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_ENDPGM 0
@@ -385,7 +386,8 @@ body: |
bb.1:
successors: %bb.2
V_NOP_e32 implicit $exec
- S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc
+ S_CMP_EQ_U32 0, 0, implicit-def $scc
+ S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc, implicit $scc
bb.2:
S_ENDPGM 0
@@ -437,6 +439,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: V_NOP_e32 implicit $exec
; CHECK-NEXT: $m0 = S_MOV_B32 -1
+ ; CHECK-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; CHECK-NEXT: S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
@@ -449,7 +452,8 @@ body: |
successors: %bb.2
V_NOP_e32 implicit $exec
$m0 = S_MOV_B32 -1
- S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc
+ S_CMP_EQ_U32 0, 0, implicit-def $scc
+ S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc, implicit $scc
bb.2:
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll
index 8c1260716f357..a5bc15e4c59e1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll
@@ -10,6 +10,7 @@ define i1 @func1() {
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: s_cmp_eq_u32 0, 0
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: s_barrier_signal_isfirst -1
; GFX12-SDAG-NEXT: s_cselect_b32 s0, -1, 0
@@ -25,6 +26,7 @@ define i1 @func1() {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: s_cmp_eq_u32 0, 0
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
; GFX12-GISEL-NEXT: s_barrier_signal_isfirst -1
; GFX12-GISEL-NEXT: s_cselect_b32 s0, 1, 0
>From 12fad596ab36f2781147e33d1b650acead3461d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle at gmail.com>
Date: Tue, 10 Jun 2025 17:07:07 -0700
Subject: [PATCH 3/4] Update
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
.../CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll
index a5bc15e4c59e1..98a078da4efbf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s
define i1 @func1() {
; GFX12-SDAG-LABEL: func1:
>From 2b7f94248e210616749b9aeb0fa7c47f8b939651 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle at gmail.com>
Date: Tue, 10 Jun 2025 17:07:14 -0700
Subject: [PATCH 4/4] Update
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
.../CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll
index 98a078da4efbf..651d204f65b6c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll
@@ -34,8 +34,8 @@ define i1 @func1() {
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
- %r = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 -1)
- ret i1 %r
+ %r = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 -1)
+ ret i1 %r
}
declare i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32)
More information about the llvm-commits
mailing list