[llvm-branch-commits] [llvm] 20566a2 - AMDGPU: Add occupancy to serialized MachineFunctionInfo
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jan 21 06:25:17 PST 2021
Author: Matt Arsenault
Date: 2021-01-21T09:21:00-05:00
New Revision: 20566a2ed825c05d56708552d33d95ee12255f46
URL: https://github.com/llvm/llvm-project/commit/20566a2ed825c05d56708552d33d95ee12255f46
DIFF: https://github.com/llvm/llvm-project/commit/20566a2ed825c05d56708552d33d95ee12255f46.diff
LOG: AMDGPU: Add occupancy to serialized MachineFunctionInfo
Not sure about the default value handling, but also not sure
defaulting to a theoretically subtarget dependent value.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index c756a134bdad..58c436836d19 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1225,6 +1225,12 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
MFI->initializeBaseYamlFields(YamlMFI);
+ if (MFI->Occupancy == 0) {
+ // Fixup the subtarget dependent default value.
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ MFI->Occupancy = ST.computeOccupancy(MF.getFunction(), MFI->getLDSSize());
+ }
+
auto parseRegister = [&](const yaml::StringValue &RegName, Register &RegVal) {
Register TempReg;
if (parseNamedRegisterReference(PFS, TempReg, RegName.Value, Error)) {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 8b4e1ba93288..9a0cdc7b1f4d 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -538,6 +538,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
+ Occupancy(MFI.getOccupancy()),
ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
@@ -555,6 +556,7 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
LDSSize = YamlMFI.LDSSize;
DynLDSAlign = YamlMFI.DynLDSAlign;
HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
+ Occupancy = YamlMFI.Occupancy;
IsEntryFunction = YamlMFI.IsEntryFunction;
NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
MemoryBound = YamlMFI.MemoryBound;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index a6195adace22..11daf63e0305 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -275,6 +275,9 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
bool HasSpilledVGPRs = false;
uint32_t HighBitsOf32BitAddress = 0;
+ // TODO: 10 may be a better default since it's the maximum.
+ unsigned Occupancy = 0;
+
StringValue ScratchRSrcReg = "$private_rsrc_reg";
StringValue FrameOffsetReg = "$fp_reg";
StringValue StackPtrOffsetReg = "$sp_reg";
@@ -313,6 +316,7 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
YamlIO.mapOptional("mode", MFI.Mode, SIMode());
YamlIO.mapOptional("highBitsOf32BitAddress",
MFI.HighBitsOf32BitAddress, 0u);
+ YamlIO.mapOptional("occupancy", MFI.Occupancy, 0);
}
};
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
index e395e43667ac..3cbfd21dedf8 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
@@ -32,6 +32,7 @@
# FULL-NEXT: fp64-fp16-input-denormals: true
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
+# FULL-NEXT: occupancy: 10
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
@@ -50,6 +51,7 @@
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' }
# SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' }
+# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:
name: kernel0
machineFunctionInfo:
@@ -102,12 +104,14 @@ body: |
# FULL-NEXT: fp64-fp16-input-denormals: true
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
+# FULL-NEXT: occupancy: 10
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
# SIMPLE-NEXT: maxKernArgAlign: 1
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:
name: no_mfi
@@ -143,12 +147,14 @@ body: |
# FULL-NEXT: fp64-fp16-input-denormals: true
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
+# FULL-NEXT: occupancy: 10
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
# SIMPLE-NEXT: maxKernArgAlign: 1
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:
name: empty_mfi
@@ -185,6 +191,7 @@ body: |
# FULL-NEXT: fp64-fp16-input-denormals: true
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
+# FULL-NEXT: occupancy: 10
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
@@ -192,6 +199,7 @@ body: |
# SIMPLE-NEXT: isEntryFunction: true
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:
name: empty_mfi_entry_func
@@ -306,3 +314,29 @@ body: |
S_ENDPGM 0
...
+
+---
+# ALL-LABEL: name: occupancy_0
+# ALL: occupancy: 10
+name: occupancy_0
+machineFunctionInfo:
+ occupancy: 0
+
+body: |
+ bb.0:
+ S_ENDPGM 0
+
+...
+
+---
+# ALL-LABEL: name: occupancy_3
+# ALL: occupancy: 3
+name: occupancy_3
+machineFunctionInfo:
+ occupancy: 3
+
+body: |
+ bb.0:
+ S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
index e3e78ddcb71f..a9736c471bf9 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -35,6 +35,7 @@
; CHECK-NEXT: fp64-fp16-input-denormals: true
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
+; CHECK-NEXT: occupancy: 10
; CHECK-NEXT: body:
define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
%gep = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %arg0
@@ -68,6 +69,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
; CHECK-NEXT: fp64-fp16-input-denormals: true
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
+; CHECK-NEXT: occupancy: 10
; CHECK-NEXT: body:
define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
ret void
@@ -98,6 +100,7 @@ define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
; CHECK-NEXT: fp64-fp16-input-denormals: true
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
+; CHECK-NEXT: occupancy: 10
; CHECK-NEXT: body:
define void @function() {
ret void
@@ -128,6 +131,7 @@ define void @function() {
; CHECK-NEXT: fp64-fp16-input-denormals: true
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
+; CHECK-NEXT: occupancy: 10
; CHECK-NEXT: body:
define void @function_nsz() #0 {
ret void
More information about the llvm-branch-commits
mailing list