[llvm-branch-commits] [llvm] 5733167 - [AMDGPU] Mark amdgpu_gfx functions as module entry function
Sebastian Neubauer via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Dec 14 01:48:30 PST 2020
Author: Sebastian Neubauer
Date: 2020-12-14T10:43:39+01:00
New Revision: 5733167f54a582d52fc06617646c13cd1e0b3362
URL: https://github.com/llvm/llvm-project/commit/5733167f54a582d52fc06617646c13cd1e0b3362
DIFF: https://github.com/llvm/llvm-project/commit/5733167f54a582d52fc06617646c13cd1e0b3362.diff
LOG: [AMDGPU] Mark amdgpu_gfx functions as module entry function
- Allows lds allocations
- Writes resource usage into COMPUTE_PGM_RSRC1 registers in PAL metadata
Differential Revision: https://reviews.llvm.org/D92946
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 137f6896c87b..a14f846b76d1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -446,7 +446,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->SwitchSection(ConfigSection);
}
- if (MFI->isEntryFunction()) {
+ if (MFI->isModuleEntryFunction()) {
getSIProgramInfo(CurrentProgramInfo, MF);
} else {
auto I = CallGraphResourceInfo.insert(
@@ -459,7 +459,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
if (STM.isAmdPalOS()) {
if (MFI->isEntryFunction())
EmitPALMetadata(MF, CurrentProgramInfo);
- else
+ else if (MFI->isModuleEntryFunction())
emitPALFunctionMetadata(MF);
} else if (!STM.isAmdHsaOS()) {
EmitProgramInfoSI(MF, CurrentProgramInfo);
@@ -922,7 +922,22 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
= TII->getNamedOperand(MI, AMDGPU::OpName::callee);
const Function *Callee = getCalleeFunction(*CalleeOp);
- if (!Callee || Callee->isDeclaration()) {
+ DenseMap<const Function *, SIFunctionResourceInfo>::const_iterator I =
+ CallGraphResourceInfo.end();
+ bool IsExternal = !Callee || Callee->isDeclaration();
+ if (!IsExternal)
+ I = CallGraphResourceInfo.find(Callee);
+
+ if (IsExternal || I == CallGraphResourceInfo.end()) {
+ // Avoid crashing on undefined behavior with an illegal call to a
+ // kernel. If a callsite's calling convention doesn't match the
+ // function's, it's undefined behavior. If the callsite calling
+ // convention does match, that would have errored earlier.
+ // FIXME: The verifier shouldn't allow this.
+ if (!IsExternal &&
+ AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
+ report_fatal_error("invalid call to entry function");
+
// If this is a call to an external function, we can't do much. Make
// conservative guesses.
@@ -943,19 +958,6 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
// We force CodeGen to run in SCC order, so the callee's register
// usage etc. should be the cumulative usage of all callees.
- auto I = CallGraphResourceInfo.find(Callee);
- if (I == CallGraphResourceInfo.end()) {
- // Avoid crashing on undefined behavior with an illegal call to a
- // kernel. If a callsite's calling convention doesn't match the
- // function's, it's undefined behavior. If the callsite calling
- // convention does match, that would have errored earlier.
- // FIXME: The verifier shouldn't allow this.
- if (AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
- report_fatal_error("invalid call to entry function");
-
- llvm_unreachable("callee should have been handled before caller");
- }
-
MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
@@ -1266,7 +1268,11 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
auto *MD = getTargetStreamer()->getPALMetadata();
const MachineFrameInfo &MFI = MF.getFrameInfo();
- MD->setStackFrameSize(MF, MFI.getStackSize());
+ MD->setFunctionScratchSize(MF, MFI.getStackSize());
+ // Set compute registers
+ MD->setRsrc1(CallingConv::AMDGPU_CS,
+ CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
+ MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
}
// This is supposed to be log2(Size)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index d0c348bc75d1..592446e00042 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1301,7 +1301,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
- if (!MFI->isEntryFunction()) {
+ if (!MFI->isModuleEntryFunction()) {
SDLoc DL(Op);
const Function &Fn = DAG.getMachineFunction().getFunction();
DiagnosticInfoUnsupported BadLDSDecl(
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 11476ee39764..9b39b86ae28f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2260,7 +2260,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
- if (!MFI->isEntryFunction()) {
+ if (!MFI->isModuleEntryFunction()) {
const Function &Fn = MF.getFunction();
DiagnosticInfoUnsupported BadLDSDecl(
Fn, "local memory global used by non-kernel function", MI.getDebugLoc(),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 14890fc43de7..5cda80255e1c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -13,11 +13,13 @@
using namespace llvm;
-AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
- MachineFunctionInfo(),
- Mode(MF.getFunction()),
- IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
- NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
+AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF)
+ : MachineFunctionInfo(), Mode(MF.getFunction()),
+ IsEntryFunction(
+ AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
+ IsModuleEntryFunction(
+ AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())),
+ NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
// FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 4a5c743e6301..b794ec1d4dae 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -44,10 +44,13 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
// State of MODE register, assumed FP mode.
AMDGPU::SIModeRegisterDefaults Mode;
- // Kernels + shaders. i.e. functions called by the driver and not called
+ // Kernels + shaders. i.e. functions called by the hardware and not called
// by other functions.
bool IsEntryFunction = false;
+ // Entry points called by other functions instead of directly by the hardware.
+ bool IsModuleEntryFunction = false;
+
bool NoSignedZerosFPMath = false;
// Function may be memory bound.
@@ -77,6 +80,8 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
return IsEntryFunction;
}
+ bool isModuleEntryFunction() const { return IsModuleEntryFunction; }
+
bool hasNoSignedZerosFPMath() const {
return NoSignedZerosFPMath;
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 8193eedf391d..b5a0d43645d4 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1068,6 +1068,15 @@ bool isEntryFunctionCC(CallingConv::ID CC) {
}
}
+bool isModuleEntryFunctionCC(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::AMDGPU_Gfx:
+ return true;
+ default:
+ return isEntryFunctionCC(CC);
+ }
+}
+
bool hasXNACK(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 7afc1bca670c..297ed3e22008 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -576,6 +576,15 @@ bool isCompute(CallingConv::ID CC);
LLVM_READNONE
bool isEntryFunctionCC(CallingConv::ID CC);
+// These functions are considered entrypoints into the current module, i.e. they
+// are allowed to be called from outside the current module. This is
diff erent
+// from isEntryFunctionCC, which is only true for functions that are entered by
+// the hardware. Module entry points include all entry functions but also
+// include functions that can be called from other functions inside or outside
+// the current module. Module entry functions are allowed to allocate LDS.
+LLVM_READNONE
+bool isModuleEntryFunctionCC(CallingConv::ID CC);
+
// FIXME: Remove this when calling conventions cleaned up
LLVM_READNONE
inline bool isKernel(CallingConv::ID CC) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index efabab90422f..cea54d9fcfc3 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -238,12 +238,11 @@ void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) {
getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);
}
-// Set the scratch size in the metadata.
-void AMDGPUPALMetadata::setStackFrameSize(const MachineFunction &MF,
- unsigned Val) {
- auto Node = MsgPackDoc.getMapNode();
+// Set the stack frame size of a function in the metadata.
+void AMDGPUPALMetadata::setFunctionScratchSize(const MachineFunction &MF,
+ unsigned Val) {
+ auto Node = getShaderFunction(MF.getFunction().getName());
Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val);
- getShaderFunctions()[MF.getFunction().getName()] = Node;
}
// Set the hardware register bit in PAL metadata to enable wave32 on the
@@ -747,6 +746,12 @@ msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunctions() {
return ShaderFunctions.getMap();
}
+// Get (create if necessary) a function in the shader functions map.
+msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunction(StringRef Name) {
+ auto Functions = getShaderFunctions();
+ return Functions[Name].getMap(/*Convert=*/true);
+}
+
// Return the PAL metadata hardware shader stage name.
static const char *getStageName(CallingConv::ID CC) {
switch (CC) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
index 3b1767bb1f64..a17825edccd3 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -79,7 +79,7 @@ class AMDGPUPALMetadata {
void setScratchSize(unsigned CC, unsigned Val);
// Set the stack frame size of a function in the metadata.
- void setStackFrameSize(const MachineFunction &MF, unsigned Val);
+ void setFunctionScratchSize(const MachineFunction &MF, unsigned Val);
// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.
@@ -130,6 +130,9 @@ class AMDGPUPALMetadata {
// Get (create if necessary) the shader functions map.
msgpack::MapDocNode getShaderFunctions();
+ // Get (create if necessary) a function in the shader functions map.
+ msgpack::MapDocNode getShaderFunction(StringRef Name);
+
// Get (create if necessary) the .hardware_stages entry for the given calling
// convention.
msgpack::MapDocNode getHwStage(unsigned CC);
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
index c6a065e1e65f..0ffe97fd28b3 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
@@ -1,5 +1,4 @@
; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL -enable-var-scope %s
@@ -126,10 +125,29 @@ define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
ret float %add
}
+ at lds = internal addrspace(3) global [64 x float] undef
+
+define amdgpu_gfx float @simple_lds(float %arg0) #0 {
+ %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
+ %val = load float, float addrspace(3)* %lds_ptr
+ ret float %val
+}
+
+define amdgpu_gfx float @simple_lds_recurse(float %arg0) #0 {
+ %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
+ %val = load float, float addrspace(3)* %lds_ptr
+ %res = call amdgpu_gfx float @simple_lds_recurse(float %val)
+ ret float %res
+}
+
attributes #0 = { nounwind }
; GCN: amdpal.pipelines:
-; GCN-NEXT: - .registers: {}
+; GCN-NEXT: - .registers:
+; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
+; SDAG-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
+; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01cf{{$}}
+; GISEL-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
; GCN-NEXT: .shader_functions:
; GCN-NEXT: dynamic_stack:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
@@ -148,6 +166,10 @@ attributes #0 = { nounwind }
; GISEL-NEXT: .stack_frame_size_in_bytes: 0xd0{{$}}
; GCN-NEXT: no_stack_indirect_call:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
+; GCN-NEXT: simple_lds:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
+; GCN-NEXT: simple_lds_recurse:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: simple_stack:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}}
; GCN-NEXT: simple_stack_call:
More information about the llvm-branch-commits
mailing list