[llvm] 6043d4d - [amdgpu] Accept an optional max to amdgpu-lds-size attribute for use in PromoteAlloca
Jon Chesterfield via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 15 13:38:51 PDT 2023
Author: Jon Chesterfield
Date: 2023-07-15T21:37:21+01:00
New Revision: 6043d4dfec1e47b6e314f3643f576ab808f246dc
URL: https://github.com/llvm/llvm-project/commit/6043d4dfec1e47b6e314f3643f576ab808f246dc
DIFF: https://github.com/llvm/llvm-project/commit/6043d4dfec1e47b6e314f3643f576ab808f246dc.diff
LOG: [amdgpu] Accept an optional max to amdgpu-lds-size attribute for use in PromoteAlloca
Added:
Modified:
llvm/docs/AMDGPUUsage.rst
llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll
Removed:
################################################################################
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index dfe64fb471fdae..1e6f421ff96027 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1090,11 +1090,13 @@ The AMDGPU backend supports the following LLVM IR attributes.
kernel argument that holds the completion action pointer. If this
attribute is absent, then the amdgpu-no-implicitarg-ptr is also removed.
- "amdgpu-lds-size" The number of bytes that will be allocated in the Local Data Store at
- address zero. Variables are allocated within this frame using absolute
- symbol metadata, primarily by the AMDGPULowerModuleLDS pass. Internal
- detail of how LDS variables are lowered, language front ends should not
- set this.
+ "amdgpu-lds-size"="min[,max]" Min is the minimum number of bytes that will be allocated in the Local
+ Data Store at address zero. Variables are allocated within this frame
+ using absolute symbol metadata, primarily by the AMDGPULowerModuleLDS
+ pass. Optional max is the maximum number of bytes that will be allocated.
+ Note that min==max indicates that no further variables can be added to
+ the frame. This is an internal detail of how LDS variables are lowered,
+ language front ends should not set this attribute.
======================================= ==========================================================
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index e265de1c0b2952..0df07d342f94e1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -198,7 +198,9 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/OptimizedStructLayout.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -1252,8 +1254,22 @@ class AMDGPULowerModuleLDS : public ModulePass {
recordLDSAbsoluteAddress(&M, DynamicVariable, Offset);
}
- if (Offset != 0)
- Func.addFnAttr("amdgpu-lds-size", std::to_string(Offset));
+ if (Offset != 0) {
+ std::string Buffer;
+ raw_string_ostream SS{Buffer};
+ SS << format("%u", Offset);
+
+ // Instead of explictly marking kernels that access dynamic variables
+ // using special case metadata, annotate with min-lds == max-lds, i.e.
+ // that there is no more space available for allocating more static
+ // LDS variables. That is the right condition to prevent allocating
+ // more variables which would collide with the addresses assigned to
+ // dynamic variables.
+ if (AllocateDynamicVariable)
+ SS << format(",%u", Offset);
+
+ Func.addFnAttr("amdgpu-lds-size", Buffer);
+ }
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 8f3bb62d6541f4..44bbfe6f13d9b3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -10,6 +10,7 @@
#include "AMDGPU.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPUSubtarget.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
@@ -43,10 +44,16 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
// Assume the attribute allocates before any known GDS globals.
StaticGDSSize = GDSSize;
+ // Second value, if present, is the maximum value that can be assigned.
+ // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
+ // during codegen.
+ std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
+ F, "amdgpu-lds-size", {0, UINT32_MAX}, true);
+
// The two separate variables are only profitable when the LDS module lowering
// pass is disabled. If graphics does not use dynamic LDS, this is never
// profitable. Leaving cleanup for a later change.
- LDSSize = F.getFnAttributeAsParsedInteger("amdgpu-lds-size", 0);
+ LDSSize = LDSSizeRange.first;
StaticLDSSize = LDSSize;
CallingConv::ID CC = F.getCallingConv();
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll
index 3f34cc3010da89..810e1edad10498 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll
@@ -179,7 +179,7 @@ attributes #0 = { noinline }
; CHECK: declare i32 @llvm.amdgcn.lds.kernel.id() #3
; CHECK: attributes #0 = { noinline }
-; CHECK: attributes #1 = { "amdgpu-lds-size"="4" }
+; CHECK: attributes #1 = { "amdgpu-lds-size"="4,4" }
; CHECK: attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
More information about the llvm-commits
mailing list