[llvm] 60dbde6 - [AMDGPU] report named barrier cnt part2 (#154588)

via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 20 12:00:48 PDT 2025


Author: Gang Chen
Date: 2025-08-20T12:00:45-07:00
New Revision: 60dbde69cd219e55bfab5581fca56e9106fe108f

URL: https://github.com/llvm/llvm-project/commit/60dbde69cd219e55bfab5581fca56e9106fe108f
DIFF: https://github.com/llvm/llvm-project/commit/60dbde69cd219e55bfab5581fca56e9106fe108f.diff

LOG: [AMDGPU] report named barrier cnt part2 (#154588)

Added: 
    

Modified: 
    llvm/docs/AMDGPUUsage.rst
    llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
    llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll

Removed: 
    


################################################################################
diff  --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index ef2a98f09967c..f7a847ec7f38f 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -18207,6 +18207,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
                                                               (wavefrontsize64)
      ``.amdhsa_uses_dynamic_stack``                           0                   GFX6-GFX12   Controls USES_DYNAMIC_STACK in
                                                                                                :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
+     ``.amdhsa_named_barrier_count``                          0                   GFX1250+     Controls NAMED_BAR_CNT in
+                                                                                               :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx12-table`.
      ``.amdhsa_system_sgpr_private_segment_wavefront_offset`` 0                   GFX6-GFX10   Controls ENABLE_PRIVATE_SEGMENT in
                                                                                   (except      :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx12-table`.
                                                                                   GFX942)

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 845449931b5a9..36c0d1cbcea22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -809,15 +809,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
           " AccumOffset: " + getMCExprStr(AdjustedAccum), false);
     }
 
-    if (AMDGPU::isGFX1250(STM)) {
-      const MCExpr *BarBlkConst = MCConstantExpr::create(4, Ctx);
-      const MCExpr *AlignToBlk = AMDGPUMCExpr::createAlignTo(
-          CurrentProgramInfo.NamedBarCnt, BarBlkConst, Ctx);
-      const MCExpr *BarBlks =
-          MCBinaryExpr::createDiv(AlignToBlk, BarBlkConst, Ctx);
-      OutStreamer->emitRawComment(" NamedBarCnt: " + getMCExprStr(BarBlks),
-                                  false);
-    }
+    if (AMDGPU::isGFX1250(STM))
+      OutStreamer->emitRawComment(
+          " NamedBarCnt: " + getMCExprStr(CurrentProgramInfo.NamedBarCnt),
+          false);
 
     OutStreamer->emitRawComment(
         " Occupancy: " + getMCExprStr(CurrentProgramInfo.Occupancy), false);
@@ -1023,7 +1018,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   ProgInfo.DynamicCallStack =
       MCBinaryExpr::createOr(GetSymRefExpr(RIK::RIK_HasDynSizedStack),
                              GetSymRefExpr(RIK::RIK_HasRecursion), Ctx);
-  ProgInfo.NamedBarCnt = GetSymRefExpr(RIK::RIK_NumNamedBarrier);
+
+  const MCExpr *BarBlkConst = MCConstantExpr::create(4, Ctx);
+  const MCExpr *AlignToBlk = AMDGPUMCExpr::createAlignTo(
+      GetSymRefExpr(RIK::RIK_NumNamedBarrier), BarBlkConst, Ctx);
+  ProgInfo.NamedBarCnt = MCBinaryExpr::createDiv(AlignToBlk, BarBlkConst, Ctx);
 
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
 

diff  --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 951473264d089..78a2678808eee 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5986,6 +5986,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
   SMRange VGPRRange;
   const MCExpr *NextFreeVGPR = ZeroExpr;
   const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
+  const MCExpr *NamedBarCnt = ZeroExpr;
   uint64_t SharedVGPRCount = 0;
   uint64_t PreloadLength = 0;
   uint64_t PreloadOffset = 0;
@@ -6208,6 +6209,10 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
       if (!isGFX90A())
         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
       AccumOffset = ExprVal;
+    } else if (ID == ".amdhsa_named_barrier_count") {
+      if (!isGFX1250())
+        return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
+      NamedBarCnt = ExprVal;
     } else if (ID == ".amdhsa_reserve_vcc") {
       if (EvaluatableExpr && !isUInt<1>(Val))
         return OutOfRangeError(ValRange);
@@ -6448,6 +6453,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
                                  getContext());
   }
 
+  if (isGFX1250())
+    MCKernelDescriptor::bits_set(KD.compute_pgm_rsrc3, NamedBarCnt,
+                                 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
+                                 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
+                                 getContext());
+
   if (IVersion.Major >= 10 && IVersion.Major < 12) {
     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
     if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {

diff  --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 197de1228a29e..b58ba947c72e2 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -507,6 +507,12 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
     OS << '\n';
   }
 
+  if (AMDGPU::isGFX1250(STI))
+    PrintField(KD.compute_pgm_rsrc3,
+               amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
+               amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
+               ".amdhsa_named_barrier_count");
+
   OS << "\t\t.amdhsa_reserve_vcc ";
   EmitMCExpr(ReserveVCC);
   OS << '\n';

diff  --git a/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll b/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
index 5295a13461f69..0804a52ba536d 100644
--- a/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
+++ b/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
@@ -26,6 +26,7 @@ define void @func2() {
     ret void
 }
 
+; SOUT:                .amdhsa_named_barrier_count 1
 ; SOUT:        .set kernel1.num_named_barrier, max(2, func1.num_named_barrier, func2.num_named_barrier)
 define amdgpu_kernel void @kernel1() #0 {
 ; CHECK-DAG: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1.kernel1, i32 11)
@@ -39,6 +40,7 @@ define amdgpu_kernel void @kernel1() #0 {
     ret void
 }
 
+; SOUT:                .amdhsa_named_barrier_count 1
 ; SOUT:        .set kernel2.num_named_barrier, max(2, func2.num_named_barrier)
 define amdgpu_kernel void @kernel2() #0 {
 ; CHECK-DAG: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 9)


        


More information about the llvm-commits mailing list