[llvm] 60dbde6 - [AMDGPU] report named barrier cnt part2 (#154588)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 20 12:00:48 PDT 2025
Author: Gang Chen
Date: 2025-08-20T12:00:45-07:00
New Revision: 60dbde69cd219e55bfab5581fca56e9106fe108f
URL: https://github.com/llvm/llvm-project/commit/60dbde69cd219e55bfab5581fca56e9106fe108f
DIFF: https://github.com/llvm/llvm-project/commit/60dbde69cd219e55bfab5581fca56e9106fe108f.diff
LOG: [AMDGPU] report named barrier cnt part2 (#154588)
Added:
Modified:
llvm/docs/AMDGPUUsage.rst
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
Removed:
################################################################################
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index ef2a98f09967c..f7a847ec7f38f 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -18207,6 +18207,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
(wavefrontsize64)
``.amdhsa_uses_dynamic_stack`` 0 GFX6-GFX12 Controls USES_DYNAMIC_STACK in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
+ ``.amdhsa_named_barrier_count`` 0 GFX1250+ Controls NAMED_BAR_CNT in
+ :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx12-table`.
``.amdhsa_system_sgpr_private_segment_wavefront_offset`` 0 GFX6-GFX10 Controls ENABLE_PRIVATE_SEGMENT in
(except :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx12-table`.
GFX942)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 845449931b5a9..36c0d1cbcea22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -809,15 +809,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
" AccumOffset: " + getMCExprStr(AdjustedAccum), false);
}
- if (AMDGPU::isGFX1250(STM)) {
- const MCExpr *BarBlkConst = MCConstantExpr::create(4, Ctx);
- const MCExpr *AlignToBlk = AMDGPUMCExpr::createAlignTo(
- CurrentProgramInfo.NamedBarCnt, BarBlkConst, Ctx);
- const MCExpr *BarBlks =
- MCBinaryExpr::createDiv(AlignToBlk, BarBlkConst, Ctx);
- OutStreamer->emitRawComment(" NamedBarCnt: " + getMCExprStr(BarBlks),
- false);
- }
+ if (AMDGPU::isGFX1250(STM))
+ OutStreamer->emitRawComment(
+ " NamedBarCnt: " + getMCExprStr(CurrentProgramInfo.NamedBarCnt),
+ false);
OutStreamer->emitRawComment(
" Occupancy: " + getMCExprStr(CurrentProgramInfo.Occupancy), false);
@@ -1023,7 +1018,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.DynamicCallStack =
MCBinaryExpr::createOr(GetSymRefExpr(RIK::RIK_HasDynSizedStack),
GetSymRefExpr(RIK::RIK_HasRecursion), Ctx);
- ProgInfo.NamedBarCnt = GetSymRefExpr(RIK::RIK_NumNamedBarrier);
+
+ const MCExpr *BarBlkConst = MCConstantExpr::create(4, Ctx);
+ const MCExpr *AlignToBlk = AMDGPUMCExpr::createAlignTo(
+ GetSymRefExpr(RIK::RIK_NumNamedBarrier), BarBlkConst, Ctx);
+ ProgInfo.NamedBarCnt = MCBinaryExpr::createDiv(AlignToBlk, BarBlkConst, Ctx);
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 951473264d089..78a2678808eee 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5986,6 +5986,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
SMRange VGPRRange;
const MCExpr *NextFreeVGPR = ZeroExpr;
const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
+ const MCExpr *NamedBarCnt = ZeroExpr;
uint64_t SharedVGPRCount = 0;
uint64_t PreloadLength = 0;
uint64_t PreloadOffset = 0;
@@ -6208,6 +6209,10 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (!isGFX90A())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
AccumOffset = ExprVal;
+ } else if (ID == ".amdhsa_named_barrier_count") {
+ if (!isGFX1250())
+ return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
+ NamedBarCnt = ExprVal;
} else if (ID == ".amdhsa_reserve_vcc") {
if (EvaluatableExpr && !isUInt<1>(Val))
return OutOfRangeError(ValRange);
@@ -6448,6 +6453,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
getContext());
}
+ if (isGFX1250())
+ MCKernelDescriptor::bits_set(KD.compute_pgm_rsrc3, NamedBarCnt,
+ COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
+ COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
+ getContext());
+
if (IVersion.Major >= 10 && IVersion.Major < 12) {
// SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 197de1228a29e..b58ba947c72e2 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -507,6 +507,12 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
OS << '\n';
}
+ if (AMDGPU::isGFX1250(STI))
+ PrintField(KD.compute_pgm_rsrc3,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
+ ".amdhsa_named_barrier_count");
+
OS << "\t\t.amdhsa_reserve_vcc ";
EmitMCExpr(ReserveVCC);
OS << '\n';
diff --git a/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll b/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
index 5295a13461f69..0804a52ba536d 100644
--- a/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
+++ b/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
@@ -26,6 +26,7 @@ define void @func2() {
ret void
}
+; SOUT: .amdhsa_named_barrier_count 1
; SOUT: .set kernel1.num_named_barrier, max(2, func1.num_named_barrier, func2.num_named_barrier)
define amdgpu_kernel void @kernel1() #0 {
; CHECK-DAG: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1.kernel1, i32 11)
@@ -39,6 +40,7 @@ define amdgpu_kernel void @kernel1() #0 {
ret void
}
+; SOUT: .amdhsa_named_barrier_count 1
; SOUT: .set kernel2.num_named_barrier, max(2, func2.num_named_barrier)
define amdgpu_kernel void @kernel2() #0 {
; CHECK-DAG: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 9)
More information about the llvm-commits
mailing list