[llvm] AMDGPU: Fix s_barrier_leave to write to scc (PR #161221)
Petar Avramovic via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 30 02:05:23 PDT 2025
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/161221
>From 0ba1dde4b359a102189e383f170d484dbc2253c6 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic at amd.com>
Date: Tue, 30 Sep 2025 10:54:35 +0200
Subject: [PATCH] AMDGPU: Fix s_barrier_leave to write to scc
s_barrier_leave implicitly defines $scc
and does not use imm that represents type of barrier,
isel pattern ignores imm operand from llvm intrinsic.
Test if SIInsertWaitcnts tracks this scc write.
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 4 +++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 2 ++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 ++
.../AMDGPU/AMDGPUInstructionSelector.cpp | 5 ++++
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 3 ++
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 5 ++--
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 ++
llvm/lib/Target/AMDGPU/SOPInstructions.td | 6 ++--
llvm/test/CodeGen/AMDGPU/s-barrier.ll | 29 ++++++++++++++++---
9 files changed, 47 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index bb4bf742fb861..a0cd1785c0130 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -13,6 +13,10 @@
include "AMDGPU.td"
include "AMDGPUCombine.td"
+def gi_ignore :
+ GIComplexOperandMatcher<s32, "selectIgnore">,
+ GIComplexPatternEquiv<Ignore>;
+
def sd_vsrc0 : ComplexPattern<i32, 1, "">;
def gi_vsrc0 :
GIComplexOperandMatcher<s32, "selectVSRC0">,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 2192a72bb27b7..bdf4cd3693b2a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -4312,6 +4312,8 @@ bool AMDGPUDAGToDAGISel::SelectBITOP3(SDValue In, SDValue &Src0, SDValue &Src1,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectIgnore(SDValue In) const { return true; }
+
SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
if (In.isUndef())
return CurDAG->getUNDEF(MVT::i32);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 4fa0d3f72e1c7..906548742f77f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -305,6 +305,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
void SelectWAVE_ADDRESS(SDNode *N);
void SelectSTACKRESTORE(SDNode *N);
+ bool SelectIgnore(SDValue In) const;
+
protected:
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 12915c7344426..7f08a4eef97c9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4266,6 +4266,11 @@ Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
return Src;
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectIgnore(MachineOperand &Root) const {
+ return {{}};
+}
+
///
/// This will select either an SGPR or VGPR operand and will save us from
/// having to write an extra tablegen pattern.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index c760fe7ef99dd..5a575a9c66a8a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -166,6 +166,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR = false) const;
+ InstructionSelector::ComplexRendererFns
+ selectIgnore(MachineOperand &Root) const;
+
InstructionSelector::ComplexRendererFns
selectVCSRC(MachineOperand &Root) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 31a2d55e1baad..c2252afdbb064 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1006,9 +1006,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
Opcode == AMDGPU::S_BARRIER_INIT_M0 ||
Opcode == AMDGPU::S_BARRIER_INIT_IMM ||
Opcode == AMDGPU::S_BARRIER_JOIN_IMM ||
- Opcode == AMDGPU::S_BARRIER_LEAVE ||
- Opcode == AMDGPU::S_BARRIER_LEAVE_IMM ||
- Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER;
+ Opcode == AMDGPU::S_BARRIER_LEAVE || Opcode == AMDGPU::DS_GWS_INIT ||
+ Opcode == AMDGPU::DS_GWS_BARRIER;
}
static bool isF16PseudoScalarTrans(unsigned Opcode) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 18a53931a6390..e46bd45aed506 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1710,6 +1710,8 @@ def VOP3PMadMixBF16Mods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixBF16Mods"
def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">;
def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">;
+def Ignore : ComplexPattern<untyped, 0, "SelectIgnore">;
+
//===----------------------------------------------------------------------===//
// SI assembler operands
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 296ce5a46287c..6a39187e48cc8 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1616,7 +1616,8 @@ def S_BARRIER_WAIT : SOPP_Pseudo <"s_barrier_wait", (ins i16imm:$simm16), "$simm
let isConvergent = 1;
}
-def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins)> {
+ def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave",
+ (ins), "", [(int_amdgcn_s_barrier_leave (Ignore))] > {
let SchedRW = [WriteBarrier];
let simm16 = 0;
let fixed_imm = 1;
@@ -1624,9 +1625,6 @@ def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins)> {
let Defs = [SCC];
}
-def S_BARRIER_LEAVE_IMM : SOPP_Pseudo <"s_barrier_leave",
- (ins i16imm:$simm16), "$simm16", [(int_amdgcn_s_barrier_leave timm:$simm16)]>;
-
def S_WAKEUP : SOPP_Pseudo <"s_wakeup", (ins) > {
let SubtargetPredicate = isGFX8Plus;
let simm16 = 0;
diff --git a/llvm/test/CodeGen/AMDGPU/s-barrier.ll b/llvm/test/CodeGen/AMDGPU/s-barrier.ll
index 8a9beb73a6baa..4c7cef9cc1a0f 100644
--- a/llvm/test/CodeGen/AMDGPU/s-barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/s-barrier.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
@bar = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
@bar2 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
@@ -102,6 +102,7 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
; GFX12-SDAG-NEXT: s_mov_b32 m0, 2
; GFX12-SDAG-NEXT: s_barrier_signal_isfirst -1
; GFX12-SDAG-NEXT: s_barrier_wait 1
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_barrier_leave
; GFX12-SDAG-NEXT: s_get_barrier_state s3, m0
; GFX12-SDAG-NEXT: s_mov_b32 m0, s2
@@ -155,10 +156,11 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
; GFX12-GISEL-NEXT: s_barrier_signal -1
; GFX12-GISEL-NEXT: s_barrier_join m0
; GFX12-GISEL-NEXT: s_barrier_signal_isfirst -1
-; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48
; GFX12-GISEL-NEXT: s_barrier_wait 1
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_barrier_leave
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48
; GFX12-GISEL-NEXT: s_get_barrier_state s0, 2
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_get_barrier_state s0, m0
@@ -256,6 +258,25 @@ define amdgpu_kernel void @kernel2(ptr addrspace(1) %out, ptr addrspace(3) %in)
ret void
}
+define amdgpu_ps void @test_barrier_leave_write_to_scc(i32 inreg %val, ptr addrspace(1) %out) {
+; GFX12-LABEL: test_barrier_leave_write_to_scc:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_barrier_leave
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_cmp_lg_u32 s0, 0
+; GFX12-NEXT: s_movk_i32 s0, 0x7b
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX12-NEXT: s_cselect_b32 s0, s0, 0x1c8
+; GFX12-NEXT: v_mov_b32_e32 v2, s0
+; GFX12-NEXT: global_store_b32 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ call void @llvm.amdgcn.s.barrier.leave(i16 1)
+ %cmp = icmp ne i32 %val, 0
+ %ret = select i1 %cmp, i32 123, i32 456
+ store i32 %ret, ptr addrspace(1) %out
+ ret void
+}
+
declare void @llvm.amdgcn.s.barrier() #1
declare void @llvm.amdgcn.s.barrier.wait(i16) #1
declare void @llvm.amdgcn.s.barrier.signal(i32) #1
More information about the llvm-commits
mailing list