[llvm] [AMDGPU] Add dynamic LDS size implicit kernel argument to CO-v5 (PR #65273)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 22 23:44:04 PDT 2023
https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/65273
>From a443ef399c2af0a22f9978acdd121da14406a792 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Mon, 4 Sep 2023 20:16:51 +0530
Subject: [PATCH] [AMDGPU] Add dynamic LDS size implicit kernel argument to
CO-v5
hidden_dynamic_lds_size argument will be added in the reserved
section at offset 120 of the implicit argument layout.
---
llvm/docs/AMDGPUUsage.rst | 3 +++
.../lib/BinaryFormat/AMDGPUMetadataVerifier.cpp | 1 +
.../Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 11 ++++++++++-
.../Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp | 17 +++++++++++++++++
.../lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h | 3 +++
.../AMDGPU/hsa-metadata-hidden-args-v5.ll | 7 ++++++-
6 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 9427df94e128e28..878e3663e1c5b96 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -4024,6 +4024,9 @@ Code object V5 metadata is the same as
buffer that conforms to the requirements of the malloc/free
device library V1 version implementation.
+ "hidden_dynamic_lds_size"
+ Size of the dynamically allocated LDS memory is passed in the kernarg.
+
"hidden_private_base"
The high 32 bits of the flat addressing private aperture base.
Only used by GFX8 to allow conversion between private segment
diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
index 35a79ec04b6e767..f94940eecae20d9 100644
--- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
+++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -135,6 +135,7 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::DocNode &Node) {
.Case("hidden_default_queue", true)
.Case("hidden_completion_action", true)
.Case("hidden_multigrid_sync_arg", true)
+ .Case("hidden_dynamic_lds_size", true)
.Case("hidden_private_base", true)
.Case("hidden_shared_base", true)
.Case("hidden_queue_ptr", true)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 5060cd3aec581ce..13e94f7efa959e3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -18,6 +18,7 @@
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "SIMachineFunctionInfo.h"
#include "SIProgramInfo.h"
+#include "Utils/AMDGPUMemoryUtils.h"
#include "llvm/IR/Module.h"
using namespace llvm;
@@ -663,7 +664,15 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
Offset += 8; // Skipped.
}
- Offset += 72; // Reserved.
+ // emit argument for hidden dynamic lds size
+ if (llvm::AMDGPU::usesDynamicLDS(&Func)) {
+ emitKernelArg(DL, Int32Ty, Align(4), "hidden_dynamic_lds_size", Offset,
+ Args);
+ } else {
+ Offset += 4; // skipped
+ }
+
+ Offset += 68; // Reserved.
// hidden_private_base and hidden_shared_base are only when the subtarget has
// ApertureRegs.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index cbdbf1c16f9f0af..e14cfc13e5408f2 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -143,6 +143,23 @@ bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
return false;
}
+bool usesDynamicLDS(const Function *F) {
+ const auto *M = F->getParent();
+ for (auto &GV : M->globals()) {
+ bool isDynLds = llvm::AMDGPU::isDynamicLDS(GV);
+ if (!isDynLds)
+ continue;
+ for (auto *U : GV.users()) {
+ auto *I = dyn_cast<Instruction>(U);
+ if (!I)
+ continue;
+ if (I->getParent()->getParent() == F)
+ return true;
+ }
+ }
+ return false;
+}
+
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
index df37c420fa720f0..94674304564afd9 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
@@ -40,6 +40,9 @@ bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA);
bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
AAResults *AA);
+/// Check if function uses dynamic LDS.
+bool usesDynamicLDS(const Function *F);
+
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
index cb30d668674c316..1a2ce636c733c53 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
@@ -81,13 +81,16 @@
; CHECK-NEXT: - .offset: 136
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .value_kind: hidden_completion_action
+; CHECK: - .offset: 144
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_kind: hidden_dynamic_lds_size
; GFX8-NEXT: - .offset: 216
; GFX8-NEXT: .size: 4
; GFX8-NEXT: .value_kind: hidden_private_base
; GFX8-NEXT: - .offset: 220
; GFX8-NEXT: .size: 4
; GFX8-NEXT: .value_kind: hidden_shared_base
-; CHECK: - .offset: 224
+; CHECK: - .offset: 224
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .value_kind: hidden_queue_ptr
@@ -97,6 +100,7 @@
; CHECK: amdhsa.version:
; CHECK-NEXT: - 1
; CHECK-NEXT: - 2
+ at lds = external hidden addrspace(3) global [0 x i32], align 4
define amdgpu_kernel void @test_v5(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
@@ -106,6 +110,7 @@ entry:
%b.val = load half, ptr addrspace(1) %b
%r.val = fadd half %a.val, %b.val
store half %r.val, ptr addrspace(1) %r
+ store i32 1234, ptr addrspacecast (ptr addrspace(3) @lds to ptr), align 4
ret void
}
More information about the llvm-commits
mailing list