[llvm] f90f488 - AMDGPU: Serialize gds size in MIR
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 19 19:13:04 PDT 2022
Author: Matt Arsenault
Date: 2022-04-19T22:12:59-04:00
New Revision: f90f4884c88659881b1b37be473bd95180e70bf4
URL: https://github.com/llvm/llvm-project/commit/f90f4884c88659881b1b37be473bd95180e70bf4
DIFF: https://github.com/llvm/llvm-project/commit/f90f4884c88659881b1b37be473bd95180e70bf4.diff
LOG: AMDGPU: Serialize gds size in MIR
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 48cf46b5f8715..ca00b5d46b976 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -65,7 +65,7 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
unsigned getMaxKernArgAlign() const { return MaxKernArgAlign.value(); }
- unsigned getLDSSize() const {
+ uint32_t getLDSSize() const {
return LDSSize;
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index c07511b929890..1c6038f217f92 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -578,6 +578,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
const llvm::MachineFunction &MF)
: ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
+ GDSSize(MFI.getGDSSize()),
DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
@@ -607,6 +608,7 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
LDSSize = YamlMFI.LDSSize;
+ GDSSize = YamlMFI.GDSSize;
DynLDSAlign = YamlMFI.DynLDSAlign;
HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
Occupancy = YamlMFI.Occupancy;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 9056865094d16..0293880b31c33 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -270,7 +270,8 @@ template <> struct MappingTraits<SIMode> {
struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
uint64_t ExplicitKernArgSize = 0;
unsigned MaxKernArgAlign = 0;
- unsigned LDSSize = 0;
+ uint32_t LDSSize = 0;
+ uint32_t GDSSize = 0;
Align DynLDSAlign;
bool IsEntryFunction = false;
bool NoSignedZerosFPMath = false;
@@ -308,6 +309,7 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
UINT64_C(0));
YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
+ YamlIO.mapOptional("gdsSize", MFI.GDSSize, 0u);
YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align());
YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
index c9170cdba792b..6873683ece6a0 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
@@ -7,6 +7,7 @@
; AFTER-PEI-NEXT: explicitKernArgSize: 12
; AFTER-PEI-NEXT: maxKernArgAlign: 8
; AFTER-PEI-NEXT: ldsSize: 0
+; AFTER-PEI-NEXT: gdsSize: 0
; AFTER-PEI-NEXT: dynLDSAlign: 1
; AFTER-PEI-NEXT: isEntryFunction: true
; AFTER-PEI-NEXT: noSignedZerosFPMath: false
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
index 515a1fcf5b534..c0a18bb76b80b 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
@@ -8,6 +8,7 @@
# FULL-NEXT: explicitKernArgSize: 128
# FULL-NEXT: maxKernArgAlign: 64
# FULL-NEXT: ldsSize: 2048
+# FULL-NEXT: gdsSize: 256
# FULL-NEXT: dynLDSAlign: 1
# FULL-NEXT: isEntryFunction: true
# FULL-NEXT: noSignedZerosFPMath: false
@@ -47,6 +48,7 @@
# SIMPLE-NEXT: explicitKernArgSize: 128
# SIMPLE-NEXT: maxKernArgAlign: 64
# SIMPLE-NEXT: ldsSize: 2048
+# SIMPLE-NEXT: gdsSize: 256
# SIMPLE-NEXT: isEntryFunction: true
# SIMPLE-NEXT: memoryBound: true
# SIMPLE-NEXT: waveLimiter: true
@@ -74,6 +76,7 @@ machineFunctionInfo:
explicitKernArgSize: 128
maxKernArgAlign: 64
ldsSize: 2048
+ gdsSize: 256
isEntryFunction: true
noSignedZerosFPMath: false
memoryBound: true
@@ -100,6 +103,7 @@ body: |
# FULL-NEXT: explicitKernArgSize: 0
# FULL-NEXT: maxKernArgAlign: 1
# FULL-NEXT: ldsSize: 0
+# FULL-NEXT: gdsSize: 0
# FULL-NEXT: dynLDSAlign: 1
# FULL-NEXT: isEntryFunction: false
# FULL-NEXT: noSignedZerosFPMath: false
@@ -163,6 +167,7 @@ body: |
# FULL-NEXT: explicitKernArgSize: 0
# FULL-NEXT: maxKernArgAlign: 1
# FULL-NEXT: ldsSize: 0
+# FULL-NEXT: gdsSize: 0
# FULL-NEXT: dynLDSAlign: 1
# FULL-NEXT: isEntryFunction: false
# FULL-NEXT: noSignedZerosFPMath: false
@@ -227,6 +232,7 @@ body: |
# FULL-NEXT: explicitKernArgSize: 0
# FULL-NEXT: maxKernArgAlign: 1
# FULL-NEXT: ldsSize: 0
+# FULL-NEXT: gdsSize: 0
# FULL-NEXT: dynLDSAlign: 1
# FULL-NEXT: isEntryFunction: true
# FULL-NEXT: noSignedZerosFPMath: false
@@ -387,6 +393,7 @@ body: |
# ALL-LABEL: name: dyn_lds_with_alignment
# FULL: ldsSize: 0
+# FULL-NEXT: gdsSize: 0
# FULL-NEXT: dynLDSAlign: 8
# SIMPLE: dynLDSAlign: 8
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
index a3a6f0898101c..da2836b42483f 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -11,6 +11,7 @@
; CHECK-NEXT: explicitKernArgSize: 128
; CHECK-NEXT: maxKernArgAlign: 64
; CHECK-NEXT: ldsSize: 2048
+; CHECK-NEXT: gdsSize: 0
; CHECK-NEXT: dynLDSAlign: 1
; CHECK-NEXT: isEntryFunction: true
; CHECK-NEXT: noSignedZerosFPMath: false
@@ -43,11 +44,14 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
ret void
}
+ at gds = addrspace(2) global [128 x i32] undef, align 4
+
; CHECK-LABEL: {{^}}name: ps_shader
; CHECK: machineFunctionInfo:
; CHECK-NEXT: explicitKernArgSize: 0
; CHECK-NEXT: maxKernArgAlign: 4
; CHECK-NEXT: ldsSize: 0
+; CHECK-NEXT: gdsSize: 0
; CHECK-NEXT: dynLDSAlign: 1
; CHECK-NEXT: isEntryFunction: true
; CHECK-NEXT: noSignedZerosFPMath: false
@@ -75,11 +79,18 @@ define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
ret void
}
+; CHECK-LABEL: {{^}}name: gds_size_shader
+; CHECK: gdsSize: 4096
+define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 {
+ ret void
+}
+
; CHECK-LABEL: {{^}}name: function
; CHECK: machineFunctionInfo:
; CHECK-NEXT: explicitKernArgSize: 0
; CHECK-NEXT: maxKernArgAlign: 1
; CHECK-NEXT: ldsSize: 0
+; CHECK-NEXT: gdsSize: 0
; CHECK-NEXT: dynLDSAlign: 1
; CHECK-NEXT: isEntryFunction: false
; CHECK-NEXT: noSignedZerosFPMath: false
@@ -121,6 +132,7 @@ define void @function() {
; CHECK-NEXT: explicitKernArgSize: 0
; CHECK-NEXT: maxKernArgAlign: 1
; CHECK-NEXT: ldsSize: 0
+; CHECK-NEXT: gdsSize: 0
; CHECK-NEXT: dynLDSAlign: 1
; CHECK-NEXT: isEntryFunction: false
; CHECK-NEXT: noSignedZerosFPMath: true
@@ -214,11 +226,12 @@ define amdgpu_cs void @wwm_reserved_regs(i32 addrspace(1)* %ptr, <4 x i32> inreg
ret void
}
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #5
+declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #6
attributes #0 = { "no-signed-zeros-fp-math" = "true" }
attributes #1 = { "amdgpu-dx10-clamp" = "false" }
attributes #2 = { "amdgpu-ieee" = "false" }
attributes #3 = { "amdgpu-dx10-clamp" = "false" "amdgpu-ieee" = "false" }
attributes #4 = { "amdgpu-32bit-address-high-bits"="0xffff8000" }
-attributes #5 = { convergent nounwind readnone willreturn }
+attributes #5 = { "amdgpu-gds-size"="4096" }
+attributes #6 = { convergent nounwind readnone willreturn }
More information about the llvm-commits
mailing list