[llvm] r308326 - AMDGPU: Annotate necessity of flat-scratch-init
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 18 09:44:58 PDT 2017
Author: arsenm
Date: Tue Jul 18 09:44:58 2017
New Revision: 308326
URL: http://llvm.org/viewvc/llvm-project?rev=308326&view=rev
Log:
AMDGPU: Annotate necessity of flat-scratch-init
As an approximation of the existing handling to avoid
regressions. Fixes using too many registers with calls
on subtargets with the SGPR allocation bug.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp?rev=308326&r1=308325&r2=308326&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp Tue Jul 18 09:44:58 2017
@@ -201,11 +201,14 @@ static void copyFeaturesToFunction(Funct
}
bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
- bool HasApertureRegs = TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
+ bool HasFlat = ST.hasFlatAddressSpace();
+ bool HasApertureRegs = ST.hasApertureRegs();
SmallPtrSet<const Constant *, 8> ConstantExprVisited;
bool Changed = false;
bool NeedQueuePtr = false;
+ bool HaveCall = false;
bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
for (BasicBlock &BB : F) {
@@ -215,11 +218,15 @@ bool AMDGPUAnnotateKernelFeatures::addFe
Function *Callee = CS.getCalledFunction();
// TODO: Do something with indirect calls.
- if (!Callee)
+ if (!Callee) {
+ if (!CS.isInlineAsm())
+ HaveCall = true;
continue;
+ }
Intrinsic::ID IID = Callee->getIntrinsicID();
if (IID == Intrinsic::not_intrinsic) {
+ HaveCall = true;
copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
Changed = true;
} else {
@@ -261,6 +268,14 @@ bool AMDGPUAnnotateKernelFeatures::addFe
Changed = true;
}
+ // TODO: We could refine this to captured pointers that could possibly be
+ // accessed by flat instructions. For now this is mostly a poor way of
+ // estimating whether there are calls before argument lowering.
+ if (HasFlat && !IsFunc && HaveCall) {
+ F.addFnAttr("amdgpu-flat-scratch");
+ Changed = true;
+ }
+
return Changed;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp?rev=308326&r1=308325&r2=308326&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp Tue Jul 18 09:44:58 2017
@@ -246,7 +246,7 @@ void SIFrameLowering::emitEntryFunctionP
// this point it appears we need the setup. This part of the prolog should be
// emitted after frame indices are eliminated.
- if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit())
+ if (MFI->hasFlatScratchInit())
emitFlatScratchInit(ST, MF, MBB);
unsigned SPReg = MFI->getStackPtrOffsetReg();
@@ -254,7 +254,8 @@ void SIFrameLowering::emitEntryFunctionP
assert(MRI.isReserved(SPReg) && "SPReg used but not reserved");
DebugLoc DL;
- int64_t StackSize = MF.getFrameInfo().getStackSize();
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ int64_t StackSize = FrameInfo.getStackSize();
if (StackSize == 0) {
BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg)
Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp?rev=308326&r1=308325&r2=308326&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Tue Jul 18 09:44:58 2017
@@ -152,7 +152,8 @@ SIMachineFunctionInfo::SIMachineFunction
}
}
- if (ST.isAmdCodeObjectV2(MF)) {
+ bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
+ if (IsCOV2) {
if (HasStackObjects || MaySpill)
PrivateSegmentBuffer = true;
@@ -172,12 +173,12 @@ SIMachineFunctionInfo::SIMachineFunction
if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
KernargSegmentPtr = true;
- // We don't need to worry about accessing spills with flat instructions.
- // TODO: On VI where we must use flat for global, we should be able to omit
- // this if it is never used for generic access.
- if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS() &&
- isEntryFunction())
- FlatScratchInit = true;
+ if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
+ // TODO: This could be refined a lot. The attribute is a poor way of
+ // detecting calls that may require it before argument lowering.
+ if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch"))
+ FlatScratchInit = true;
+ }
}
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
Modified: llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll?rev=308326&r1=308325&r2=308326&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll Tue Jul 18 09:44:58 2017
@@ -250,9 +250,48 @@ define void @func_indirect_use_implicita
ret void
}
+; HSA: declare void @external.func() #15
+declare void @external.func() #3
+
+; HSA: define internal void @defined.func() #15 {
+define internal void @defined.func() #3 {
+ ret void
+}
+
+; HSA: define void @func_call_external() #15 {
+define void @func_call_external() #3 {
+ call void @external.func()
+ ret void
+}
+
+; HSA: define void @func_call_defined() #15 {
+define void @func_call_defined() #3 {
+ call void @defined.func()
+ ret void
+}
+
+; HSA: define void @func_call_asm() #15 {
+define void @func_call_asm() #3 {
+ call void asm sideeffect "", ""() #3
+ ret void
+}
+
+; HSA: define amdgpu_kernel void @kern_call_external() #16 {
+define amdgpu_kernel void @kern_call_external() #3 {
+ call void @external.func()
+ ret void
+}
+
+; HSA: define amdgpu_kernel void @func_kern_defined() #16 {
+define amdgpu_kernel void @func_kern_defined() #3 {
+ call void @defined.func()
+ ret void
+}
+
attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind "target-cpu"="fiji" }
attributes #2 = { nounwind "target-cpu"="gfx900" }
+attributes #3 = { nounwind }
; HSA: attributes #0 = { nounwind readnone speculatable }
; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" }
@@ -269,3 +308,5 @@ attributes #2 = { nounwind "target-cpu"=
; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" }
; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" }
; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" }
+; HSA: attributes #15 = { nounwind }
+; HSA: attributes #16 = { nounwind "amdgpu-flat-scratch" }
More information about the llvm-commits
mailing list