[llvm] [AMDGPU] Add dynamic LDS size implicit kernel argument to CO-v5 (PR #65273)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 19 05:18:50 PDT 2023


https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/65273

>From b427eb273d45ce2d6c52c3bb1dd6b7ff691e5175 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Mon, 4 Sep 2023 20:16:51 +0530
Subject: [PATCH] [AMDGPU] Add dynamic LDS size implicit kernel argument to
 CO-v5

hidden_dynamic_lds_size argument will be added in the reserved
section at offset 120 of the implicit argument layout.
---
 llvm/docs/AMDGPUUsage.rst                     |  3 +++
 .../BinaryFormat/AMDGPUMetadataVerifier.cpp   |  1 +
 .../AMDGPU/AMDGPUHSAMetadataStreamer.cpp      | 11 +++++++++-
 .../Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp | 20 +++++++++++++++++++
 .../Target/AMDGPU/Utils/AMDGPUMemoryUtils.h   |  3 +++
 .../AMDGPU/hsa-metadata-hidden-args-v5.ll     |  7 ++++++-
 6 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 8022816d7e616d3..a48f25d6c8070b1 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -4024,6 +4024,9 @@ Code object V5 metadata is the same as
                                                        buffer that conforms to the requirements of the malloc/free
                                                        device library V1 version implementation.
 
+                                                     "hidden_dynamic_lds_size"
+                                                       Size of the dynamically allocated LDS memory is passed in the kernarg.
+
                                                      "hidden_private_base"
                                                        The high 32 bits of the flat addressing private aperture base.
                                                        Only used by GFX8 to allow conversion between private segment
diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
index 35a79ec04b6e767..f94940eecae20d9 100644
--- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
+++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -135,6 +135,7 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::DocNode &Node) {
                                .Case("hidden_default_queue", true)
                                .Case("hidden_completion_action", true)
                                .Case("hidden_multigrid_sync_arg", true)
+                               .Case("hidden_dynamic_lds_size", true)
                                .Case("hidden_private_base", true)
                                .Case("hidden_shared_base", true)
                                .Case("hidden_queue_ptr", true)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 5060cd3aec581ce..1a1dad5cc953aa4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -18,6 +18,7 @@
 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIProgramInfo.h"
+#include "Utils/AMDGPUMemoryUtils.h"
 #include "llvm/IR/Module.h"
 using namespace llvm;
 
@@ -663,7 +664,15 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
     Offset += 8; // Skipped.
   }
 
-  Offset += 72; // Reserved.
+  // emit argument for hidden dynamic lds size
+  if (llvm::AMDGPU::usesDynamicLDS(Func)) {
+    emitKernelArg(DL, Int32Ty, Align(4), "hidden_dynamic_lds_size", Offset,
+                  Args);
+  } else {
+    Offset += 4; // skipped
+  }
+
+  Offset += 68; // Reserved.
 
   // hidden_private_base and hidden_shared_base are only when the subtarget has
   // ApertureRegs.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index cbdbf1c16f9f0af..db81dd17226ddbd 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -143,6 +143,26 @@ bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
   return false;
 }
 
+bool usesDynamicLDS(const Function &F) {
+  for (auto &BB : F) {
+    for (auto &I : BB) {
+      if (auto *Load = dyn_cast<LoadInst>(&I)) {
+        auto *Val = Load->getPointerOperand()->stripPointerCasts();
+        if (auto *GV = dyn_cast<GlobalVariable>(Val))
+          if (llvm::AMDGPU::isDynamicLDS(*GV))
+            return true;
+      }
+      if (auto *Store = dyn_cast<StoreInst>(&I)) {
+        auto *Val = Store->getPointerOperand()->stripPointerCasts();
+        if (auto *GV = dyn_cast<GlobalVariable>(Val))
+          if (llvm::AMDGPU::isDynamicLDS(*GV))
+            return true;
+      }
+    }
+  }
+  return false;
+}
+
 } // end namespace AMDGPU
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
index df37c420fa720f0..5788698ba9284af 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
@@ -40,6 +40,9 @@ bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA);
 bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
                            AAResults *AA);
 
+/// Check if function uses dynamic LDS.
+bool usesDynamicLDS(const Function &F);
+
 } // end namespace AMDGPU
 
 } // end namespace llvm
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
index cb30d668674c316..1a2ce636c733c53 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
@@ -81,13 +81,16 @@
 ; CHECK-NEXT:      - .offset:         136
 ; CHECK-NEXT:        .size:           8
 ; CHECK-NEXT:        .value_kind:     hidden_completion_action
+; CHECK:          - .offset:          144
+; CHECK-NEXT:        .size:           4
+; CHECK-NEXT:        .value_kind:     hidden_dynamic_lds_size
 ; GFX8-NEXT:      - .offset:         216
 ; GFX8-NEXT:        .size:           4
 ; GFX8-NEXT:        .value_kind:     hidden_private_base
 ; GFX8-NEXT:      - .offset:         220
 ; GFX8-NEXT:        .size:           4
 ; GFX8-NEXT:        .value_kind:     hidden_shared_base
-; CHECK:      - .offset:         224
+; CHECK:          - .offset:          224
 ; CHECK-NEXT:        .size:           8
 ; CHECK-NEXT:        .value_kind:     hidden_queue_ptr
 
@@ -97,6 +100,7 @@
 ; CHECK:  amdhsa.version:
 ; CHECK-NEXT: - 1
 ; CHECK-NEXT: - 2
+ at lds = external hidden addrspace(3) global [0 x i32], align 4
 define amdgpu_kernel void @test_v5(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
@@ -106,6 +110,7 @@ entry:
   %b.val = load half, ptr addrspace(1) %b
   %r.val = fadd half %a.val, %b.val
   store half %r.val, ptr addrspace(1) %r
+  store i32 1234, ptr addrspacecast (ptr addrspace(3) @lds to ptr), align 4
   ret void
 }
 



More information about the llvm-commits mailing list