[llvm] 575fad2 - [AMDGPU] Upstream the Support for array of named barriers (#154604)

via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 20 14:53:06 PDT 2025


Author: Gang Chen
Date: 2025-08-20T14:53:03-07:00
New Revision: 575fad289273d3b3b77acc0cf3b54192b6160943

URL: https://github.com/llvm/llvm-project/commit/575fad289273d3b3b77acc0cf3b54192b6160943
DIFF: https://github.com/llvm/llvm-project/commit/575fad289273d3b3b77acc0cf3b54192b6160943.diff

LOG: [AMDGPU] Upstream the Support for array of named barriers (#154604)

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
    llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
    llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 2d8f259007c66..93f9c7d7fb176 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -955,6 +955,7 @@ class AMDGPULowerModuleLDS {
       Module &M, LDSUsesInfoTy &LDSUsesInfo,
       VariableFunctionMap &LDSToKernelsThatNeedToAccessItIndirectly) {
     bool Changed = false;
+    const DataLayout &DL = M.getDataLayout();
     // The 1st round: give module-absolute assignments
     int NumAbsolutes = 0;
     std::vector<GlobalVariable *> OrderedGVs;
@@ -976,8 +977,11 @@ class AMDGPULowerModuleLDS {
     }
     OrderedGVs = sortByName(std::move(OrderedGVs));
     for (GlobalVariable *GV : OrderedGVs) {
-      int BarId = ++NumAbsolutes;
       unsigned BarrierScope = llvm::AMDGPU::Barrier::BARRIER_SCOPE_WORKGROUP;
+      unsigned BarId = NumAbsolutes + 1;
+      unsigned BarCnt = DL.getTypeAllocSize(GV->getValueType()) / 16;
+      NumAbsolutes += BarCnt;
+
       // 4 bits for alignment, 5 bits for the barrier num,
       // 3 bits for the barrier scope
       unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
@@ -1015,12 +1019,11 @@ class AMDGPULowerModuleLDS {
         // create a new GV used only by this kernel and its function.
         auto NewGV = uniquifyGVPerKernel(M, GV, F);
         Changed |= (NewGV != GV);
-        int BarId = (NumAbsolutes + 1);
-        if (Kernel2BarId.contains(F)) {
-          BarId = (Kernel2BarId[F] + 1);
-        }
-        Kernel2BarId[F] = BarId;
         unsigned BarrierScope = llvm::AMDGPU::Barrier::BARRIER_SCOPE_WORKGROUP;
+        unsigned BarId = Kernel2BarId[F];
+        BarId += NumAbsolutes + 1;
+        unsigned BarCnt = DL.getTypeAllocSize(GV->getValueType()) / 16;
+        Kernel2BarId[F] += BarCnt;
         unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
         recordLDSAbsoluteAddress(&M, NewGV, Offset);
       }

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
index dfe7c53aaca06..5776d14a3020a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
@@ -31,28 +31,40 @@ Align getAlign(const DataLayout &DL, const GlobalVariable *GV) {
                                        GV->getValueType());
 }
 
-TargetExtType *isNamedBarrier(const GlobalVariable &GV) {
-  // TODO: Allow arrays and structs, if all members are barriers
-  // in the same scope.
-  // TODO: Disallow other uses of target("amdgcn.named.barrier") including:
-  // - Structs containing barriers in 
diff erent scope.
-  // - Structs containing a mixture of barriers and other data.
-  // - Globals in other address spaces.
-  // - Allocas.
+// Returns the target extension type of a global variable,
+// which can only be a TargetExtType, an array or single-element struct of it,
+// or their nesting combination.
+// TODO: allow struct of multiple TargetExtType elements of the same type.
+// TODO: Disallow other uses of target("amdgcn.named.barrier") including:
+// - Structs containing barriers in 
diff erent scope/rank
+// - Structs containing a mixture of barriers and other data.
+// - Globals in other address spaces.
+// - Allocas.
+static TargetExtType *getTargetExtType(const GlobalVariable &GV) {
   Type *Ty = GV.getValueType();
   while (true) {
     if (auto *TTy = dyn_cast<TargetExtType>(Ty))
-      return TTy->getName() == "amdgcn.named.barrier" ? TTy : nullptr;
+      return TTy;
     if (auto *STy = dyn_cast<StructType>(Ty)) {
-      if (STy->getNumElements() == 0)
+      if (STy->getNumElements() != 1)
         return nullptr;
       Ty = STy->getElementType(0);
       continue;
     }
+    if (auto *ATy = dyn_cast<ArrayType>(Ty)) {
+      Ty = ATy->getElementType();
+      continue;
+    }
     return nullptr;
   }
 }
 
+TargetExtType *isNamedBarrier(const GlobalVariable &GV) {
+  if (TargetExtType *Ty = getTargetExtType(GV))
+    return Ty->getName() == "amdgcn.named.barrier" ? Ty : nullptr;
+  return nullptr;
+}
+
 bool isDynamicLDS(const GlobalVariable &GV) {
   // external zero size addrspace(3) without initializer is dynlds.
   const Module *M = GV.getParent();

diff  --git a/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll b/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
index 0804a52ba536d..03a666fbe3aea 100644
--- a/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
+++ b/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
@@ -1,16 +1,18 @@
 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -verify-machineinstrs -o - %s | FileCheck -check-prefixes=SOUT %s
 
- at bar2 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
+%class.ExpAmdWorkgroupWaveBarrier = type { target("amdgcn.named.barrier", 0) }
+
+ at bar2 = internal addrspace(3) global [2 x target("amdgcn.named.barrier", 0)] poison
 @bar3 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
- at bar1 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
+ at bar1 = internal addrspace(3) global [4 x %class.ExpAmdWorkgroupWaveBarrier] poison
 
-; CHECK: @bar2 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison, !absolute_symbol !0
+; CHECK: @bar2 = internal addrspace(3) global [2 x target("amdgcn.named.barrier", 0)] poison, !absolute_symbol !0
 ; CHECK-NEXT: @bar3 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison, !absolute_symbol !1
-; CHECK-NEXT: @bar1 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison, !absolute_symbol !2
-; CHECK-NEXT: @bar1.kernel1 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison, !absolute_symbol !2
+; CHECK-NEXT: @bar1 = internal addrspace(3) global [4 x %class.ExpAmdWorkgroupWaveBarrier] poison, !absolute_symbol !2
+; CHECK-NEXT: @bar1.kernel1 = internal addrspace(3) global [4 x %class.ExpAmdWorkgroupWaveBarrier] poison, !absolute_symbol !2
 
-; SOUT:        .set func1.num_named_barrier, 3
+; SOUT:        .set func1.num_named_barrier, 7
 define void @func1() {
     call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar3, i32 7)
     call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar3)
@@ -18,7 +20,7 @@ define void @func1() {
     ret void
 }
 
-; SOUT:        .set func2.num_named_barrier, 1
+; SOUT:        .set func2.num_named_barrier, 2
 define void @func2() {
     call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar2, i32 7)
     call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar2)
@@ -26,8 +28,8 @@ define void @func2() {
     ret void
 }
 
-; SOUT:                .amdhsa_named_barrier_count 1
-; SOUT:        .set kernel1.num_named_barrier, max(2, func1.num_named_barrier, func2.num_named_barrier)
+; SOUT:                .amdhsa_named_barrier_count 2
+; SOUT:        .set kernel1.num_named_barrier, max(6, func1.num_named_barrier, func2.num_named_barrier)
 define amdgpu_kernel void @kernel1() #0 {
 ; CHECK-DAG: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1.kernel1, i32 11)
     call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 11)
@@ -40,8 +42,8 @@ define amdgpu_kernel void @kernel1() #0 {
     ret void
 }
 
-; SOUT:                .amdhsa_named_barrier_count 1
-; SOUT:        .set kernel2.num_named_barrier, max(2, func2.num_named_barrier)
+; SOUT:                .amdhsa_named_barrier_count 2
+; SOUT:        .set kernel2.num_named_barrier, max(6, func2.num_named_barrier)
 define amdgpu_kernel void @kernel2() #0 {
 ; CHECK-DAG: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 9)
     call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 9)
@@ -68,5 +70,5 @@ attributes #1 = { convergent nounwind }
 attributes #2 = { nounwind readnone }
 
 ; CHECK: !0 = !{i32 8396816, i32 8396817}
-; CHECK-NEXT: !1 = !{i32 8396848, i32 8396849}
-; CHECK-NEXT: !2 = !{i32 8396832, i32 8396833}
+; CHECK-NEXT: !1 = !{i32 8396912, i32 8396913}
+; CHECK-NEXT: !2 = !{i32 8396848, i32 8396849}


        


More information about the llvm-commits mailing list