[clang] [llvm] [AMDGPU] Add s_wakeup_barrier instruction for gfx1250 (PR #170501)
Mirko BrkuĊĦanin via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 9 09:17:59 PST 2025
https://github.com/mbrkusanin updated https://github.com/llvm/llvm-project/pull/170501
>From db0984038b12f69ed458099432720019a5f70b24 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Wed, 3 Dec 2025 16:57:48 +0100
Subject: [PATCH 1/3] [AMDGPU] Add s_wakeup_barrier instruction for gfx1250
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 1 +
.../CodeGenOpenCL/builtins-amdgcn-gfx1250.cl | 15 +++++++
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 6 +++
.../AMDGPU/AMDGPUInstructionSelector.cpp | 14 +++++++
llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp | 1 +
.../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 +
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 29 +++++++++++--
llvm/lib/Target/AMDGPU/SOPInstructions.td | 14 +++++++
.../amdgpu-lower-exec-sync-and-module-lds.ll | 3 +-
.../CodeGen/AMDGPU/amdgpu-lower-exec-sync.ll | 1 -
llvm/test/CodeGen/AMDGPU/s-wakeup-barrier.ll | 41 +++++++++++++++++++
llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s | 12 ++++++
.../Disassembler/AMDGPU/gfx1250_dasm_sop1.txt | 9 ++++
13 files changed, 141 insertions(+), 7 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/s-wakeup-barrier.ll
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 0b78b460c0d6a..fceb4094f8ab4 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -749,6 +749,7 @@ TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr16_b128_v8bf16, "V8yV8y*3", "nc", "gfx
TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")
TARGET_BUILTIN(__builtin_amdgcn_s_monitor_sleep, "vIs", "n", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vv*", "n", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_wait_asynccnt, "vIUs", "n", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index e4a5fe9014c2e..b32bcdd408512 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -1489,6 +1489,21 @@ void test_s_cluster_barrier()
__builtin_amdgcn_s_cluster_barrier();
}
+// CHECK-LABEL: @test_s_wakeup_barrier(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
+// CHECK-NEXT: call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) [[TMP1]])
+// CHECK-NEXT: ret void
+//
+void test_s_wakeup_barrier(void *bar)
+{
+ __builtin_amdgcn_s_wakeup_barrier(bar);
+}
+
// CHECK-LABEL: @test_global_add_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 03488f8389aa2..55ada1b08767f 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -314,6 +314,12 @@ def int_amdgcn_s_barrier_join : ClangBuiltin<"__builtin_amdgcn_s_barrier_join">,
Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
IntrNoCallback, IntrNoFree]>;
+// void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) %barrier)
+// The %barrier argument must be uniform, otherwise behavior is undefined.
+def int_amdgcn_s_wakeup_barrier : ClangBuiltin<"__builtin_amdgcn_s_wakeup_barrier">,
+ Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
+ IntrNoCallback, IntrNoFree]>;
+
// void @llvm.amdgcn.s.barrier.wait(i16 %barrierType)
def int_amdgcn_s_barrier_wait : ClangBuiltin<"__builtin_amdgcn_s_barrier_wait">,
Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 15492144ba615..b87313a196fa3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2392,6 +2392,16 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
case Intrinsic::amdgcn_s_barrier_init:
case Intrinsic::amdgcn_s_barrier_signal_var:
return selectNamedBarrierInit(I, IntrinsicID);
+ case Intrinsic::amdgcn_s_wakeup_barrier: {
+ if (!AMDGPU::isGFX1250(STI)) {
+ Function &F = I.getMF()->getFunction();
+ F.getContext().diagnose(
+ DiagnosticInfoUnsupported(F, "intrinsic not supported on subtarget",
+ I.getDebugLoc(), DS_Error));
+ return false;
+ }
+ return selectNamedBarrierInst(I, IntrinsicID);
+ }
case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_get_named_barrier_state:
return selectNamedBarrierInst(I, IntrinsicID);
@@ -6832,6 +6842,8 @@ unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) {
llvm_unreachable("not a named barrier op");
case Intrinsic::amdgcn_s_barrier_join:
return AMDGPU::S_BARRIER_JOIN_IMM;
+ case Intrinsic::amdgcn_s_wakeup_barrier:
+ return AMDGPU::S_WAKEUP_BARRIER_IMM;
case Intrinsic::amdgcn_s_get_named_barrier_state:
return AMDGPU::S_GET_BARRIER_STATE_IMM;
};
@@ -6841,6 +6853,8 @@ unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) {
llvm_unreachable("not a named barrier op");
case Intrinsic::amdgcn_s_barrier_join:
return AMDGPU::S_BARRIER_JOIN_M0;
+ case Intrinsic::amdgcn_s_wakeup_barrier:
+ return AMDGPU::S_WAKEUP_BARRIER_M0;
case Intrinsic::amdgcn_s_get_named_barrier_state:
return AMDGPU::S_GET_BARRIER_STATE_M0;
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
index 8145816405915..4e16c13e30e91 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
@@ -371,6 +371,7 @@ bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
case Intrinsic::amdgcn_s_barrier_wait:
case Intrinsic::amdgcn_s_barrier_leave:
case Intrinsic::amdgcn_s_get_barrier_state:
+ case Intrinsic::amdgcn_s_wakeup_barrier:
case Intrinsic::amdgcn_wave_barrier:
case Intrinsic::amdgcn_sched_barrier:
case Intrinsic::amdgcn_sched_group_barrier:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index a7955ee2dac40..c0e0977399710 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3347,6 +3347,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(B, MI, 1);
return;
case Intrinsic::amdgcn_s_barrier_join:
+ case Intrinsic::amdgcn_s_wakeup_barrier:
constrainOpWithReadfirstlane(B, MI, 1);
return;
case Intrinsic::amdgcn_s_barrier_init:
@@ -5583,6 +5584,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
break;
case Intrinsic::amdgcn_s_barrier_join:
+ case Intrinsic::amdgcn_s_wakeup_barrier:
OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
break;
case Intrinsic::amdgcn_s_barrier_init:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a871d978dfbc8..84439cfad740c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11527,6 +11527,11 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
auto *NewMI = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops);
return SDValue(NewMI, 0);
}
+ case Intrinsic::amdgcn_s_wakeup_barrier: {
+ if (!AMDGPU::isGFX1250(*Subtarget))
+ return SDValue();
+ [[fallthrough]];
+ }
case Intrinsic::amdgcn_s_barrier_join: {
// these three intrinsics have one operand: barrier pointer
SDValue Chain = Op->getOperand(0);
@@ -11536,16 +11541,32 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
if (isa<ConstantSDNode>(BarOp)) {
uint64_t BarVal = cast<ConstantSDNode>(BarOp)->getZExtValue();
- Opc = AMDGPU::S_BARRIER_JOIN_IMM;
-
+ switch (IntrinsicID) {
+ default:
+ return SDValue();
+ case Intrinsic::amdgcn_s_barrier_join:
+ Opc = AMDGPU::S_BARRIER_JOIN_IMM;
+ break;
+ case Intrinsic::amdgcn_s_wakeup_barrier:
+ Opc = AMDGPU::S_WAKEUP_BARRIER_IMM;
+ break;
+ }
// extract the BarrierID from bits 4-9 of the immediate
unsigned BarID = (BarVal >> 4) & 0x3F;
SDValue K = DAG.getTargetConstant(BarID, DL, MVT::i32);
Ops.push_back(K);
Ops.push_back(Chain);
} else {
- Opc = AMDGPU::S_BARRIER_JOIN_M0;
-
+ switch (IntrinsicID) {
+ default:
+ return SDValue();
+ case Intrinsic::amdgcn_s_barrier_join:
+ Opc = AMDGPU::S_BARRIER_JOIN_M0;
+ break;
+ case Intrinsic::amdgcn_s_wakeup_barrier:
+ Opc = AMDGPU::S_WAKEUP_BARRIER_M0;
+ break;
+ }
// extract the BarrierID from bits 4-9 of BarOp, copy to M0[5:0]
SDValue M0Val;
M0Val = DAG.getNode(ISD::SRL, DL, MVT::i32, BarOp,
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 1931e0be15152..8f92dfb957b1a 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -504,6 +504,12 @@ def S_BARRIER_JOIN_M0 : SOP1_Pseudo <"s_barrier_join m0", (outs), (ins),
let isConvergent = 1;
}
+def S_WAKEUP_BARRIER_M0 : SOP1_Pseudo <"s_wakeup_barrier m0", (outs), (ins),
+ "", []>{
+ let SchedRW = [WriteBarrier];
+ let isConvergent = 1;
+ let SubtargetPredicate = isGFX1250Plus;
+}
} // End Uses = [M0]
def S_BARRIER_SIGNAL_IMM : SOP1_Pseudo <"s_barrier_signal", (outs),
@@ -527,6 +533,12 @@ def S_BARRIER_JOIN_IMM : SOP1_Pseudo <"s_barrier_join", (outs),
let isConvergent = 1;
}
+def S_WAKEUP_BARRIER_IMM : SOP1_Pseudo <"s_wakeup_barrier", (outs),
+ (ins SplitBarrier:$src0), "$src0", []>{
+ let SchedRW = [WriteBarrier];
+ let isConvergent = 1;
+ let SubtargetPredicate = isGFX1250Plus;
+}
} // End has_sdst = 0
def S_GET_BARRIER_STATE_IMM : SOP1_Pseudo <"s_get_barrier_state", (outs SSrc_b32:$sdst),
@@ -2226,6 +2238,8 @@ defm S_SLEEP_VAR : SOP1_IMM_Real_gfx12<0x058>;
// GFX1250
defm S_GET_SHADER_CYCLES_U64 : SOP1_Real_gfx12<0x06>;
defm S_ADD_PC_I64 : SOP1_Real_gfx12<0x04b>;
+defm S_WAKEUP_BARRIER_M0 : SOP1_M0_Real_gfx12<0x057>;
+defm S_WAKEUP_BARRIER_IMM : SOP1_IMM_Real_gfx12<0x057>;
//===----------------------------------------------------------------------===//
// SOP1 - GFX1150, GFX12
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync-and-module-lds.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync-and-module-lds.ll
index bed8fa20a5044..215fb06106e11 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync-and-module-lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync-and-module-lds.ll
@@ -112,8 +112,7 @@ attributes #2 = { nounwind readnone }
; CHECK: attributes #[[ATTR0]] = { nounwind }
; CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-lds-size"="1" }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
;.
; CHECK: [[META0]] = !{i32 8396816, i32 8396817}
; CHECK: [[META1]] = !{i32 8396912, i32 8396913}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync.ll
index bde6db6463cb1..74e6d83ed2d94 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync.ll
@@ -96,7 +96,6 @@ attributes #2 = { nounwind readnone }
;.
; CHECK: attributes #[[ATTR0]] = { nounwind }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind }
;.
; CHECK: [[META0]] = !{i32 8396816, i32 8396817}
; CHECK: [[META1]] = !{i32 8396912, i32 8396913}
diff --git a/llvm/test/CodeGen/AMDGPU/s-wakeup-barrier.ll b/llvm/test/CodeGen/AMDGPU/s-wakeup-barrier.ll
new file mode 100644
index 0000000000000..b92d38cd857f2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/s-wakeup-barrier.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1250-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1250-GISEL %s
+
+ at bar = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
+
+define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
+; GFX1250-SDAG-LABEL: kernel1:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX1250-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x2c
+; GFX1250-SDAG-NEXT: s_mov_b32 m0, 1
+; GFX1250-SDAG-NEXT: s_wakeup_barrier m0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_lshr_b32 s0, s0, 4
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: s_and_b32 m0, s0, 63
+; GFX1250-SDAG-NEXT: s_wakeup_barrier m0
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: kernel1:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX1250-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c
+; GFX1250-GISEL-NEXT: s_wakeup_barrier 1
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: s_lshr_b32 s0, s0, 4
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: s_and_b32 m0, s0, 63
+; GFX1250-GISEL-NEXT: s_wakeup_barrier m0
+; GFX1250-GISEL-NEXT: s_endpgm
+ call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) @bar)
+ call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) %in)
+ ret void
+}
+
+
+declare void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3)) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { convergent nounwind }
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s
index cc351afd49f04..68cfdc4c01178 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s
@@ -70,3 +70,15 @@ s_get_barrier_state s3, -4
s_get_barrier_state s3, m0
// GFX1250: s_get_barrier_state s3, m0 ; encoding: [0x7d,0x50,0x83,0xbe]
+
+s_wakeup_barrier 1
+// GFX1250: s_wakeup_barrier 1 ; encoding: [0x81,0x57,0x80,0xbe]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+s_wakeup_barrier -1
+// GFX1250: s_wakeup_barrier -1 ; encoding: [0xc1,0x57,0x80,0xbe]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+s_wakeup_barrier m0
+// GFX1250: s_wakeup_barrier m0 ; encoding: [0x7d,0x57,0x80,0xbe]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt
index 34a46467c6839..1490914a5f61f 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt
@@ -33,3 +33,12 @@
0x7d,0x50,0x83,0xbe
# GFX1250: s_get_barrier_state s3, m0 ; encoding: [0x7d,0x50,0x83,0xbe]
+
+0x81,0x57,0x80,0xbe
+# GFX1250: s_wakeup_barrier 1 ; encoding: [0x81,0x57,0x80,0xbe]
+
+0xc1,0x57,0x80,0xbe
+# GFX1250: s_wakeup_barrier -1 ; encoding: [0xc1,0x57,0x80,0xbe]
+
+0x7d,0x57,0x80,0xbe
+# GFX1250: s_wakeup_barrier m0 ; encoding: [0x7d,0x57,0x80,0xbe]
>From 97df8cde81e5449af09ebcb60f3e16d84f9e53db Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Wed, 3 Dec 2025 17:37:41 +0100
Subject: [PATCH 2/3] Added subtarget feature: WakeupBarrier
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl | 4 ++--
clang/test/CodeGenOpenCL/amdgpu-features.cl | 4 ++--
llvm/lib/Target/AMDGPU/AMDGPU.td | 10 ++++++++++
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 2 +-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++++
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +-
llvm/lib/Target/AMDGPU/SOPInstructions.td | 4 ++--
llvm/lib/TargetParser/TargetParser.cpp | 1 +
9 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index fceb4094f8ab4..b0f9cf2c551fe 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -749,7 +749,7 @@ TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr16_b128_v8bf16, "V8yV8y*3", "nc", "gfx
TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")
TARGET_BUILTIN(__builtin_amdgcn_s_monitor_sleep, "vIs", "n", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vv*", "n", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vv*", "n", "wakeup-barrier-inst")
TARGET_BUILTIN(__builtin_amdgcn_s_wait_asynccnt, "vIUs", "n", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts")
diff --git a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
index c5656c49c4761..f6312be342ca9 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
@@ -26,8 +26,8 @@ kernel void foo(global int *p) { *p = 1; }
// CHECK-NEXT: ret void
//
//.
-// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" "uniform-work-group-size"="false" }
-// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" }
+// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wakeup-barrier-inst,+wavefrontsize32" "uniform-work-group-size"="false" }
+// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wakeup-barrier-inst,+wavefrontsize32" }
// CHECK: attributes #[[ATTR2]] = { convergent nounwind }
//.
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index bd162b40b8e47..96f4cdfc8d1ca 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -109,8 +109,8 @@
// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
-// GFX1250: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
-// GFX1251: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
+// GFX1250: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wakeup-barrier-inst,+wavefrontsize32"
+// GFX1251: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wakeup-barrier-inst,+wavefrontsize32"
// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize64"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 3b14a82bbbb04..673a88c79677e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1276,6 +1276,12 @@ def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst",
"Has s_setprio_inc_wg instruction."
>;
+def FeatureWakeupBarrier : SubtargetFeature<"wakeup-barrier-inst",
+ "HasWakeupBarrier",
+ "true",
+ "Has s_wakeup_barrier instruction."
+>;
+
//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
@@ -2200,6 +2206,7 @@ def FeatureISAVersion12_50_Common : FeatureSet<
FeaturePkAddMinMaxInsts,
FeatureLdsBarrierArriveAtomic,
FeatureSetPrioIncWgInst,
+ FeatureWakeupBarrier,
Feature45BitNumRecordsBufferResource,
FeatureSupportsXNACK,
FeatureXNACK,
@@ -3076,6 +3083,9 @@ def HasLdsBarrierArriveAtomic : Predicate<"Subtarget->hasLdsBarrierArriveAtomic(
def HasSetPrioIncWgInst : Predicate<"Subtarget->hasSetPrioIncWgInst()">,
AssemblerPredicate<(all_of FeatureSetPrioIncWgInst)>;
+def HasWakeupBarrier : Predicate<"Subtarget->hasWakeupBarrier()">,
+ AssemblerPredicate<(all_of FeatureWakeupBarrier)>;
+
def NeedsAlignedVGPRs : Predicate<"Subtarget->needsAlignedVGPRs()">,
AssemblerPredicate<(all_of FeatureRequiresAlignedVGPRs)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b87313a196fa3..d7c81d22a2fb0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2393,7 +2393,7 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
case Intrinsic::amdgcn_s_barrier_signal_var:
return selectNamedBarrierInit(I, IntrinsicID);
case Intrinsic::amdgcn_s_wakeup_barrier: {
- if (!AMDGPU::isGFX1250(STI)) {
+ if (!STI.hasWakeupBarrier()) {
Function &F = I.getMF()->getFunction();
F.getContext().diagnose(
DiagnosticInfoUnsupported(F, "intrinsic not supported on subtarget",
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index ddff3ad0244f9..6b54699b7009e 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -289,6 +289,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasPointSampleAccel = false;
bool HasLdsBarrierArriveAtomic = false;
bool HasSetPrioIncWgInst = false;
+ bool HasWakeupBarrier = false;
bool RequiresCOV6 = false;
bool UseBlockVGPROpsForCSR = false;
@@ -1612,6 +1613,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// \returns true if target has S_SETPRIO_INC_WG instruction.
bool hasSetPrioIncWgInst() const { return HasSetPrioIncWgInst; }
+ // \returns true if target has S_WAKEUP_BARRIER instruction.
+ bool hasWakeupBarrier() const { return HasWakeupBarrier; }
+
// \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
// of sign-extending. Note that GFX1250 has not only fixed the bug but also
// extended VA to 57 bits.
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 84439cfad740c..b9a0e96ae12a8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11528,7 +11528,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return SDValue(NewMI, 0);
}
case Intrinsic::amdgcn_s_wakeup_barrier: {
- if (!AMDGPU::isGFX1250(*Subtarget))
+ if (!Subtarget->hasWakeupBarrier())
return SDValue();
[[fallthrough]];
}
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 8f92dfb957b1a..a0bd298313b36 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -508,7 +508,7 @@ def S_WAKEUP_BARRIER_M0 : SOP1_Pseudo <"s_wakeup_barrier m0", (outs), (ins),
"", []>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
- let SubtargetPredicate = isGFX1250Plus;
+ let SubtargetPredicate = HasWakeupBarrier;
}
} // End Uses = [M0]
@@ -537,7 +537,7 @@ def S_WAKEUP_BARRIER_IMM : SOP1_Pseudo <"s_wakeup_barrier", (outs),
(ins SplitBarrier:$src0), "$src0", []>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
- let SubtargetPredicate = isGFX1250Plus;
+ let SubtargetPredicate = HasWakeupBarrier;
}
} // End has_sdst = 0
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 28f3649a840d6..75fa121839a7a 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -443,6 +443,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["atomic-global-pk-add-bf16-inst"] = true;
Features["atomic-ds-pk-add-16-insts"] = true;
Features["setprio-inc-wg-inst"] = true;
+ Features["wakeup-barrier-inst"] = true;
Features["atomic-fmin-fmax-global-f32"] = true;
Features["atomic-fmin-fmax-global-f64"] = true;
Features["wavefrontsize32"] = true;
>From 024c788f9aadc10ca1bd697735c59fd82f50a46d Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Tue, 9 Dec 2025 18:17:07 +0100
Subject: [PATCH 3/3] Rename subtarget feature
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl | 4 ++--
clang/test/CodeGenOpenCL/amdgpu-features.cl | 4 ++--
llvm/lib/Target/AMDGPU/AMDGPU.td | 10 +++++-----
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 2 +-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++--
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +-
llvm/lib/Target/AMDGPU/SOPInstructions.td | 4 ++--
llvm/lib/TargetParser/TargetParser.cpp | 2 +-
9 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index b0f9cf2c551fe..cd7d19651307f 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -749,7 +749,7 @@ TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr16_b128_v8bf16, "V8yV8y*3", "nc", "gfx
TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")
TARGET_BUILTIN(__builtin_amdgcn_s_monitor_sleep, "vIs", "n", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vv*", "n", "wakeup-barrier-inst")
+TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vv*", "n", "s-wakeup-barrier-inst")
TARGET_BUILTIN(__builtin_amdgcn_s_wait_asynccnt, "vIUs", "n", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts")
diff --git a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
index f6312be342ca9..e30f393a6257b 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
@@ -26,8 +26,8 @@ kernel void foo(global int *p) { *p = 1; }
// CHECK-NEXT: ret void
//
//.
-// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wakeup-barrier-inst,+wavefrontsize32" "uniform-work-group-size"="false" }
-// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wakeup-barrier-inst,+wavefrontsize32" }
+// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+s-wakeup-barrier-inst,+wavefrontsize32" "uniform-work-group-size"="false" }
+// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+s-wakeup-barrier-inst,+wavefrontsize32" }
// CHECK: attributes #[[ATTR2]] = { convergent nounwind }
//.
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index 96f4cdfc8d1ca..b849129b377c2 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -109,8 +109,8 @@
// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
-// GFX1250: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wakeup-barrier-inst,+wavefrontsize32"
-// GFX1251: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wakeup-barrier-inst,+wavefrontsize32"
+// GFX1250: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+s-wakeup-barrier-inst,+wavefrontsize32"
+// GFX1251: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+s-wakeup-barrier-inst,+wavefrontsize32"
// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize64"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 673a88c79677e..e62db7c564ccd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1276,8 +1276,8 @@ def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst",
"Has s_setprio_inc_wg instruction."
>;
-def FeatureWakeupBarrier : SubtargetFeature<"wakeup-barrier-inst",
- "HasWakeupBarrier",
+def FeatureSWakeupBarrier : SubtargetFeature<"s-wakeup-barrier-inst",
+ "HasSWakeupBarrier",
"true",
"Has s_wakeup_barrier instruction."
>;
@@ -2206,7 +2206,7 @@ def FeatureISAVersion12_50_Common : FeatureSet<
FeaturePkAddMinMaxInsts,
FeatureLdsBarrierArriveAtomic,
FeatureSetPrioIncWgInst,
- FeatureWakeupBarrier,
+ FeatureSWakeupBarrier,
Feature45BitNumRecordsBufferResource,
FeatureSupportsXNACK,
FeatureXNACK,
@@ -3083,8 +3083,8 @@ def HasLdsBarrierArriveAtomic : Predicate<"Subtarget->hasLdsBarrierArriveAtomic(
def HasSetPrioIncWgInst : Predicate<"Subtarget->hasSetPrioIncWgInst()">,
AssemblerPredicate<(all_of FeatureSetPrioIncWgInst)>;
-def HasWakeupBarrier : Predicate<"Subtarget->hasWakeupBarrier()">,
- AssemblerPredicate<(all_of FeatureWakeupBarrier)>;
+def HasSWakeupBarrier : Predicate<"Subtarget->hasSWakeupBarrier()">,
+ AssemblerPredicate<(all_of FeatureSWakeupBarrier)>;
def NeedsAlignedVGPRs : Predicate<"Subtarget->needsAlignedVGPRs()">,
AssemblerPredicate<(all_of FeatureRequiresAlignedVGPRs)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index d7c81d22a2fb0..5dc7c8327102e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2393,7 +2393,7 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
case Intrinsic::amdgcn_s_barrier_signal_var:
return selectNamedBarrierInit(I, IntrinsicID);
case Intrinsic::amdgcn_s_wakeup_barrier: {
- if (!STI.hasWakeupBarrier()) {
+ if (!STI.hasSWakeupBarrier()) {
Function &F = I.getMF()->getFunction();
F.getContext().diagnose(
DiagnosticInfoUnsupported(F, "intrinsic not supported on subtarget",
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 6b54699b7009e..b1b0d69960ae4 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -289,7 +289,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasPointSampleAccel = false;
bool HasLdsBarrierArriveAtomic = false;
bool HasSetPrioIncWgInst = false;
- bool HasWakeupBarrier = false;
+ bool HasSWakeupBarrier = false;
bool RequiresCOV6 = false;
bool UseBlockVGPROpsForCSR = false;
@@ -1614,7 +1614,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasSetPrioIncWgInst() const { return HasSetPrioIncWgInst; }
// \returns true if target has S_WAKEUP_BARRIER instruction.
- bool hasWakeupBarrier() const { return HasWakeupBarrier; }
+ bool hasSWakeupBarrier() const { return HasSWakeupBarrier; }
// \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
// of sign-extending. Note that GFX1250 has not only fixed the bug but also
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b9a0e96ae12a8..90b7aa2644e9b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11528,7 +11528,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return SDValue(NewMI, 0);
}
case Intrinsic::amdgcn_s_wakeup_barrier: {
- if (!Subtarget->hasWakeupBarrier())
+ if (!Subtarget->hasSWakeupBarrier())
return SDValue();
[[fallthrough]];
}
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index a0bd298313b36..8dc1cdd9354ab 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -508,7 +508,7 @@ def S_WAKEUP_BARRIER_M0 : SOP1_Pseudo <"s_wakeup_barrier m0", (outs), (ins),
"", []>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
- let SubtargetPredicate = HasWakeupBarrier;
+ let SubtargetPredicate = HasSWakeupBarrier;
}
} // End Uses = [M0]
@@ -537,7 +537,7 @@ def S_WAKEUP_BARRIER_IMM : SOP1_Pseudo <"s_wakeup_barrier", (outs),
(ins SplitBarrier:$src0), "$src0", []>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
- let SubtargetPredicate = HasWakeupBarrier;
+ let SubtargetPredicate = HasSWakeupBarrier;
}
} // End has_sdst = 0
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 75fa121839a7a..b15736cce37a5 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -443,7 +443,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["atomic-global-pk-add-bf16-inst"] = true;
Features["atomic-ds-pk-add-16-insts"] = true;
Features["setprio-inc-wg-inst"] = true;
- Features["wakeup-barrier-inst"] = true;
+ Features["s-wakeup-barrier-inst"] = true;
Features["atomic-fmin-fmax-global-f32"] = true;
Features["atomic-fmin-fmax-global-f64"] = true;
Features["wavefrontsize32"] = true;
More information about the llvm-commits
mailing list