[llvm] r282223 - AMDGPU/SI: Include implicit arguments in kernarg_segment_byte_size
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 22 18:33:27 PDT 2016
Author: tstellar
Date: Thu Sep 22 20:33:26 2016
New Revision: 282223
URL: http://llvm.org/viewvc/llvm-project?rev=282223&view=rev
Log:
AMDGPU/SI: Include implicit arguments in kernarg_segment_byte_size
Reviewers: arsenm
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, llvm-commits, tony-tye
Differential Revision: https://reviews.llvm.org/D24835
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=282223&r1=282222&r2=282223&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Thu Sep 22 20:33:26 2016
@@ -730,7 +730,8 @@ void AMDGPUAsmPrinter::EmitAmdKernelCode
header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
// FIXME: Should use getKernArgSize
- header.kernarg_segment_byte_size = MFI->getABIArgOffset();
+ header.kernarg_segment_byte_size =
+ STM.getKernArgSegmentSize(MFI->getABIArgOffset());
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;
header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=282223&r1=282222&r2=282223&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Thu Sep 22 20:33:26 2016
@@ -297,6 +297,15 @@ bool SISubtarget::isVGPRSpillingEnabled(
return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
}
+unsigned SISubtarget::getKernArgSegmentSize(unsigned ExplicitArgBytes) const {
+ unsigned ImplicitBytes = getImplicitArgNumBytes();
+ if (ImplicitBytes == 0)
+ return ExplicitArgBytes;
+
+ unsigned Alignment = getAlignmentForImplicitArgPtr();
+ return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
+}
+
unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
if (SGPRs <= 80)
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=282223&r1=282222&r2=282223&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Thu Sep 22 20:33:26 2016
@@ -142,6 +142,10 @@ public:
return TargetTriple.getOS() == Triple::Mesa3D;
}
+ bool isOpenCLEnv() const {
+ return TargetTriple.getEnvironment() == Triple::OpenCL;
+ }
+
Generation getGeneration() const {
return Gen;
}
@@ -288,6 +292,14 @@ public:
return isAmdHsaOS() ? 8 : 4;
}
+ unsigned getImplicitArgNumBytes() const {
+ if (isMesa3DOS())
+ return 16;
+ if (isAmdHsaOS() && isOpenCLEnv())
+ return 32;
+ return 0;
+ }
+
unsigned getStackAlignment() const {
// Scratch is allocated in 256 dword per wave blocks.
return 4 * 256 / getWavefrontSize();
@@ -521,6 +533,8 @@ public:
return SGPRInitBug;
}
+ unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const;
+
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll?rev=282223&r1=282222&r2=282223&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll Thu Sep 22 20:33:26 2016
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL,HSA-NOENV %s
+; RUN: llc -mtriple=amdgcn--amdhsa-opencl -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL,HSA-OPENCL %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,MESA,ALL %s
; RUN: llc -mtriple=amdgcn-mesa-unknown -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,MESA,ALL %s
@@ -29,6 +30,9 @@ define void @test_implicit(i32 addrspace
}
; ALL-LABEL: {{^}}test_implicit_alignment
+; HSA-NOENV: kernarg_segment_byte_size = 10
+; HSA-OPENCL: kernarg_segment_byte_size = 48
+; OS-MESA3D: kernarg_segment_byte_size = 28
; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
More information about the llvm-commits
mailing list