[llvm] 575fad2 - [AMDGPU] Upstream the Support for array of named barriers (#154604)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 20 14:53:06 PDT 2025
Author: Gang Chen
Date: 2025-08-20T14:53:03-07:00
New Revision: 575fad289273d3b3b77acc0cf3b54192b6160943
URL: https://github.com/llvm/llvm-project/commit/575fad289273d3b3b77acc0cf3b54192b6160943
DIFF: https://github.com/llvm/llvm-project/commit/575fad289273d3b3b77acc0cf3b54192b6160943.diff
LOG: [AMDGPU] Upstream the Support for array of named barriers (#154604)
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 2d8f259007c66..93f9c7d7fb176 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -955,6 +955,7 @@ class AMDGPULowerModuleLDS {
Module &M, LDSUsesInfoTy &LDSUsesInfo,
VariableFunctionMap &LDSToKernelsThatNeedToAccessItIndirectly) {
bool Changed = false;
+ const DataLayout &DL = M.getDataLayout();
// The 1st round: give module-absolute assignments
int NumAbsolutes = 0;
std::vector<GlobalVariable *> OrderedGVs;
@@ -976,8 +977,11 @@ class AMDGPULowerModuleLDS {
}
OrderedGVs = sortByName(std::move(OrderedGVs));
for (GlobalVariable *GV : OrderedGVs) {
- int BarId = ++NumAbsolutes;
unsigned BarrierScope = llvm::AMDGPU::Barrier::BARRIER_SCOPE_WORKGROUP;
+ unsigned BarId = NumAbsolutes + 1;
+ unsigned BarCnt = DL.getTypeAllocSize(GV->getValueType()) / 16;
+ NumAbsolutes += BarCnt;
+
// 4 bits for alignment, 5 bits for the barrier num,
// 3 bits for the barrier scope
unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
@@ -1015,12 +1019,11 @@ class AMDGPULowerModuleLDS {
// create a new GV used only by this kernel and its function.
auto NewGV = uniquifyGVPerKernel(M, GV, F);
Changed |= (NewGV != GV);
- int BarId = (NumAbsolutes + 1);
- if (Kernel2BarId.contains(F)) {
- BarId = (Kernel2BarId[F] + 1);
- }
- Kernel2BarId[F] = BarId;
unsigned BarrierScope = llvm::AMDGPU::Barrier::BARRIER_SCOPE_WORKGROUP;
+ unsigned BarId = Kernel2BarId[F];
+ BarId += NumAbsolutes + 1;
+ unsigned BarCnt = DL.getTypeAllocSize(GV->getValueType()) / 16;
+ Kernel2BarId[F] += BarCnt;
unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
recordLDSAbsoluteAddress(&M, NewGV, Offset);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
index dfe7c53aaca06..5776d14a3020a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
@@ -31,28 +31,40 @@ Align getAlign(const DataLayout &DL, const GlobalVariable *GV) {
GV->getValueType());
}
-TargetExtType *isNamedBarrier(const GlobalVariable &GV) {
- // TODO: Allow arrays and structs, if all members are barriers
- // in the same scope.
- // TODO: Disallow other uses of target("amdgcn.named.barrier") including:
- // - Structs containing barriers in
diff erent scope.
- // - Structs containing a mixture of barriers and other data.
- // - Globals in other address spaces.
- // - Allocas.
+// Returns the target extension type of a global variable,
+// which can only be a TargetExtType, an array or single-element struct of it,
+// or their nesting combination.
+// TODO: allow struct of multiple TargetExtType elements of the same type.
+// TODO: Disallow other uses of target("amdgcn.named.barrier") including:
+// - Structs containing barriers in
diff erent scope/rank
+// - Structs containing a mixture of barriers and other data.
+// - Globals in other address spaces.
+// - Allocas.
+static TargetExtType *getTargetExtType(const GlobalVariable &GV) {
Type *Ty = GV.getValueType();
while (true) {
if (auto *TTy = dyn_cast<TargetExtType>(Ty))
- return TTy->getName() == "amdgcn.named.barrier" ? TTy : nullptr;
+ return TTy;
if (auto *STy = dyn_cast<StructType>(Ty)) {
- if (STy->getNumElements() == 0)
+ if (STy->getNumElements() != 1)
return nullptr;
Ty = STy->getElementType(0);
continue;
}
+ if (auto *ATy = dyn_cast<ArrayType>(Ty)) {
+ Ty = ATy->getElementType();
+ continue;
+ }
return nullptr;
}
}
+TargetExtType *isNamedBarrier(const GlobalVariable &GV) {
+ if (TargetExtType *Ty = getTargetExtType(GV))
+ return Ty->getName() == "amdgcn.named.barrier" ? Ty : nullptr;
+ return nullptr;
+}
+
bool isDynamicLDS(const GlobalVariable &GV) {
// external zero size addrspace(3) without initializer is dynlds.
const Module *M = GV.getParent();
diff --git a/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll b/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
index 0804a52ba536d..03a666fbe3aea 100644
--- a/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
+++ b/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
@@ -1,16 +1,18 @@
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -verify-machineinstrs -o - %s | FileCheck -check-prefixes=SOUT %s
- at bar2 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
+%class.ExpAmdWorkgroupWaveBarrier = type { target("amdgcn.named.barrier", 0) }
+
+ at bar2 = internal addrspace(3) global [2 x target("amdgcn.named.barrier", 0)] poison
@bar3 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
- at bar1 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
+ at bar1 = internal addrspace(3) global [4 x %class.ExpAmdWorkgroupWaveBarrier] poison
-; CHECK: @bar2 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison, !absolute_symbol !0
+; CHECK: @bar2 = internal addrspace(3) global [2 x target("amdgcn.named.barrier", 0)] poison, !absolute_symbol !0
; CHECK-NEXT: @bar3 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison, !absolute_symbol !1
-; CHECK-NEXT: @bar1 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison, !absolute_symbol !2
-; CHECK-NEXT: @bar1.kernel1 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison, !absolute_symbol !2
+; CHECK-NEXT: @bar1 = internal addrspace(3) global [4 x %class.ExpAmdWorkgroupWaveBarrier] poison, !absolute_symbol !2
+; CHECK-NEXT: @bar1.kernel1 = internal addrspace(3) global [4 x %class.ExpAmdWorkgroupWaveBarrier] poison, !absolute_symbol !2
-; SOUT: .set func1.num_named_barrier, 3
+; SOUT: .set func1.num_named_barrier, 7
define void @func1() {
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar3, i32 7)
call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar3)
@@ -18,7 +20,7 @@ define void @func1() {
ret void
}
-; SOUT: .set func2.num_named_barrier, 1
+; SOUT: .set func2.num_named_barrier, 2
define void @func2() {
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar2, i32 7)
call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar2)
@@ -26,8 +28,8 @@ define void @func2() {
ret void
}
-; SOUT: .amdhsa_named_barrier_count 1
-; SOUT: .set kernel1.num_named_barrier, max(2, func1.num_named_barrier, func2.num_named_barrier)
+; SOUT: .amdhsa_named_barrier_count 2
+; SOUT: .set kernel1.num_named_barrier, max(6, func1.num_named_barrier, func2.num_named_barrier)
define amdgpu_kernel void @kernel1() #0 {
; CHECK-DAG: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1.kernel1, i32 11)
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 11)
@@ -40,8 +42,8 @@ define amdgpu_kernel void @kernel1() #0 {
ret void
}
-; SOUT: .amdhsa_named_barrier_count 1
-; SOUT: .set kernel2.num_named_barrier, max(2, func2.num_named_barrier)
+; SOUT: .amdhsa_named_barrier_count 2
+; SOUT: .set kernel2.num_named_barrier, max(6, func2.num_named_barrier)
define amdgpu_kernel void @kernel2() #0 {
; CHECK-DAG: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 9)
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 9)
@@ -68,5 +70,5 @@ attributes #1 = { convergent nounwind }
attributes #2 = { nounwind readnone }
; CHECK: !0 = !{i32 8396816, i32 8396817}
-; CHECK-NEXT: !1 = !{i32 8396848, i32 8396849}
-; CHECK-NEXT: !2 = !{i32 8396832, i32 8396833}
+; CHECK-NEXT: !1 = !{i32 8396912, i32 8396913}
+; CHECK-NEXT: !2 = !{i32 8396848, i32 8396849}
More information about the llvm-commits
mailing list