[llvm] AMDGPU: Fix s_barrier_leave to write to scc (PR #161221)
Petar Avramovic via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 29 08:56:32 PDT 2025
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/161221
>From 67af11294fcad019917ef87d0576f5b6b7cbf71c Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic at amd.com>
Date: Mon, 29 Sep 2025 17:55:33 +0200
Subject: [PATCH] AMDGPU: Fix s_barrier_leave to write to scc
s_barrier_leave implicitly defines $scc
and does not use imm that represents type of barrier,
isel pattern ignores imm operand from llvm intrinsic.
Test if SIInsertWaitcnts tracks this scc write.
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 4 ++++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 2 ++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 ++
.../AMDGPU/AMDGPUInstructionSelector.cpp | 5 ++++
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 3 +++
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 5 ++--
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 ++
llvm/lib/Target/AMDGPU/SOPInstructions.td | 6 ++---
.../AMDGPU/s-barrier-leave-insert-waitcnts.ll | 24 +++++++++++++++++++
llvm/test/CodeGen/AMDGPU/s-barrier.ll | 6 +++--
10 files changed, 50 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/s-barrier-leave-insert-waitcnts.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index bb4bf742fb861..a0cd1785c0130 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -13,6 +13,10 @@
include "AMDGPU.td"
include "AMDGPUCombine.td"
+def gi_ignore :
+ GIComplexOperandMatcher<s32, "selectIgnore">,
+ GIComplexPatternEquiv<Ignore>;
+
def sd_vsrc0 : ComplexPattern<i32, 1, "">;
def gi_vsrc0 :
GIComplexOperandMatcher<s32, "selectVSRC0">,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 2192a72bb27b7..bdf4cd3693b2a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -4312,6 +4312,8 @@ bool AMDGPUDAGToDAGISel::SelectBITOP3(SDValue In, SDValue &Src0, SDValue &Src1,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectIgnore(SDValue In) const { return true; }
+
SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
if (In.isUndef())
return CurDAG->getUNDEF(MVT::i32);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 4fa0d3f72e1c7..906548742f77f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -305,6 +305,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
void SelectWAVE_ADDRESS(SDNode *N);
void SelectSTACKRESTORE(SDNode *N);
+ bool SelectIgnore(SDValue In) const;
+
protected:
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 12915c7344426..7f08a4eef97c9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4266,6 +4266,11 @@ Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
return Src;
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectIgnore(MachineOperand &Root) const {
+ return {{}};
+}
+
///
/// This will select either an SGPR or VGPR operand and will save us from
/// having to write an extra tablegen pattern.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index c760fe7ef99dd..5a575a9c66a8a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -166,6 +166,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR = false) const;
+ InstructionSelector::ComplexRendererFns
+ selectIgnore(MachineOperand &Root) const;
+
InstructionSelector::ComplexRendererFns
selectVCSRC(MachineOperand &Root) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 31a2d55e1baad..c2252afdbb064 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1006,9 +1006,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
Opcode == AMDGPU::S_BARRIER_INIT_M0 ||
Opcode == AMDGPU::S_BARRIER_INIT_IMM ||
Opcode == AMDGPU::S_BARRIER_JOIN_IMM ||
- Opcode == AMDGPU::S_BARRIER_LEAVE ||
- Opcode == AMDGPU::S_BARRIER_LEAVE_IMM ||
- Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER;
+ Opcode == AMDGPU::S_BARRIER_LEAVE || Opcode == AMDGPU::DS_GWS_INIT ||
+ Opcode == AMDGPU::DS_GWS_BARRIER;
}
static bool isF16PseudoScalarTrans(unsigned Opcode) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 18a53931a6390..e46bd45aed506 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1710,6 +1710,8 @@ def VOP3PMadMixBF16Mods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixBF16Mods"
def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">;
def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">;
+def Ignore : ComplexPattern<untyped, 0, "SelectIgnore">;
+
//===----------------------------------------------------------------------===//
// SI assembler operands
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 296ce5a46287c..6a39187e48cc8 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1616,7 +1616,8 @@ def S_BARRIER_WAIT : SOPP_Pseudo <"s_barrier_wait", (ins i16imm:$simm16), "$simm
let isConvergent = 1;
}
-def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins)> {
+ def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave",
+ (ins), "", [(int_amdgcn_s_barrier_leave (Ignore))] > {
let SchedRW = [WriteBarrier];
let simm16 = 0;
let fixed_imm = 1;
@@ -1624,9 +1625,6 @@ def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins)> {
let Defs = [SCC];
}
-def S_BARRIER_LEAVE_IMM : SOPP_Pseudo <"s_barrier_leave",
- (ins i16imm:$simm16), "$simm16", [(int_amdgcn_s_barrier_leave timm:$simm16)]>;
-
def S_WAKEUP : SOPP_Pseudo <"s_wakeup", (ins) > {
let SubtargetPredicate = isGFX8Plus;
let simm16 = 0;
diff --git a/llvm/test/CodeGen/AMDGPU/s-barrier-leave-insert-waitcnts.ll b/llvm/test/CodeGen/AMDGPU/s-barrier-leave-insert-waitcnts.ll
new file mode 100644
index 0000000000000..e9baad0d48388
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/s-barrier-leave-insert-waitcnts.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -stop-after=si-insert-waitcnts < %s | FileCheck -check-prefix=GFX12 %s
+
+define amdgpu_ps void @func(i32 inreg %val, ptr addrspace(1) %out) {
+ ; GFX12-LABEL: name: func
+ ; GFX12: bb.0 (%ir-block.0):
+ ; GFX12-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_BARRIER_LEAVE implicit-def dead $scc
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: S_CMP_LG_U32 killed renamable $sgpr0, 0, implicit-def $scc
+ ; GFX12-NEXT: renamable $sgpr0 = S_MOVK_I32 123
+ ; GFX12-NEXT: renamable $sgpr0 = S_CSELECT_B32 killed renamable $sgpr0, 456, implicit killed $scc
+ ; GFX12-NEXT: $vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX12-NEXT: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec :: (store (s32) into %ir.out, addrspace 1)
+ ; GFX12-NEXT: S_ENDPGM 0
+ call void @llvm.amdgcn.s.barrier.leave(i16 1)
+ %cmp = icmp ne i32 %val, 0
+ %ret = select i1 %cmp, i32 123, i32 456
+ store i32 %ret, ptr addrspace(1) %out
+ ret void
+}
+
+declare void @llvm.amdgcn.s.barrier.leave(i16)
diff --git a/llvm/test/CodeGen/AMDGPU/s-barrier.ll b/llvm/test/CodeGen/AMDGPU/s-barrier.ll
index 8a9beb73a6baa..efe90476e8b10 100644
--- a/llvm/test/CodeGen/AMDGPU/s-barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/s-barrier.ll
@@ -102,6 +102,7 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
; GFX12-SDAG-NEXT: s_mov_b32 m0, 2
; GFX12-SDAG-NEXT: s_barrier_signal_isfirst -1
; GFX12-SDAG-NEXT: s_barrier_wait 1
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_barrier_leave
; GFX12-SDAG-NEXT: s_get_barrier_state s3, m0
; GFX12-SDAG-NEXT: s_mov_b32 m0, s2
@@ -155,10 +156,11 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
; GFX12-GISEL-NEXT: s_barrier_signal -1
; GFX12-GISEL-NEXT: s_barrier_join m0
; GFX12-GISEL-NEXT: s_barrier_signal_isfirst -1
-; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48
; GFX12-GISEL-NEXT: s_barrier_wait 1
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_barrier_leave
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48
; GFX12-GISEL-NEXT: s_get_barrier_state s0, 2
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_get_barrier_state s0, m0
More information about the llvm-commits
mailing list