[llvm] r309398 - AMDGPU: Annotate implicitarg.ptr usage

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 28 08:52:08 PDT 2017


Author: arsenm
Date: Fri Jul 28 08:52:08 2017
New Revision: 309398

URL: http://llvm.org/viewvc/llvm-project?rev=309398&view=rev
Log:
AMDGPU: Annotate implicitarg.ptr usage

We need to pass something to functions for this to work.
It isn't derivable just from the kernarg segment pointer
because the implicit arguments are placed after the
kernel arguments.

Also fixes missing test for the intrinsic.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
    llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
    llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp?rev=309398&r1=309397&r2=309398&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp Fri Jul 28 08:52:08 2017
@@ -156,8 +156,9 @@ static StringRef intrinsicToAttrName(Int
   case Intrinsic::amdgcn_dispatch_id:
     return "amdgpu-dispatch-id";
   case Intrinsic::amdgcn_kernarg_segment_ptr:
-  case Intrinsic::amdgcn_implicitarg_ptr:
     return "amdgpu-kernarg-segment-ptr";
+  case Intrinsic::amdgcn_implicitarg_ptr:
+    return "amdgpu-implicitarg-ptr";
   case Intrinsic::amdgcn_queue_ptr:
   case Intrinsic::trap:
   case Intrinsic::debugtrap:
@@ -190,7 +191,8 @@ static void copyFeaturesToFunction(Funct
     { "amdgpu-work-group-id-z" },
     { "amdgpu-dispatch-ptr" },
     { "amdgpu-dispatch-id" },
-    { "amdgpu-kernarg-segment-ptr" }
+    { "amdgpu-kernarg-segment-ptr" },
+    { "amdgpu-implicitarg-ptr" }
   };
 
   if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=309398&r1=309397&r2=309398&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Fri Jul 28 08:52:08 2017
@@ -764,7 +764,8 @@ public:
     return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
 
-  unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
+  unsigned getKernArgSegmentSize(const MachineFunction &MF,
+                                 unsigned ExplictArgBytes) const;
 
   /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
   unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=309398&r1=309397&r2=309398&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri Jul 28 08:52:08 2017
@@ -899,6 +899,13 @@ SDValue SITargetLowering::lowerKernArgPa
                      DAG.getConstant(Offset, SL, PtrVT));
 }
 
+SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG,
+                                            const SDLoc &SL) const {
+  auto MFI = DAG.getMachineFunction().getInfo<SIMachineFunctionInfo>();
+  uint64_t Offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
+  return lowerKernArgParameterPtr(DAG, SL, DAG.getEntryNode(), Offset);
+}
+
 SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
                                          const SDLoc &SL, SDValue Val,
                                          bool Signed,
@@ -3029,8 +3036,9 @@ SDValue SITargetLowering::LowerINTRINSIC
                                 TRI->getPreloadedValue(MF, Reg), VT);
   }
   case Intrinsic::amdgcn_implicitarg_ptr: {
-    unsigned offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
-    return lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), offset);
+    if (MFI->isEntryFunction())
+      return getImplicitArgPtr(DAG, DL);
+    report_fatal_error("amdgcn.implicitarg.ptr not implemented for functions");
   }
   case Intrinsic::amdgcn_kernarg_segment_ptr: {
     unsigned Reg

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h?rev=309398&r1=309397&r2=309398&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h Fri Jul 28 08:52:08 2017
@@ -23,6 +23,7 @@ namespace llvm {
 class SITargetLowering final : public AMDGPUTargetLowering {
   SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
                                    SDValue Chain, uint64_t Offset) const;
+  SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
   SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
                                    const SDLoc &SL, SDValue Chain,
                                    uint64_t Offset, bool Signed,

Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp?rev=309398&r1=309397&r2=309398&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Fri Jul 28 08:52:08 2017
@@ -93,11 +93,17 @@ SIMachineFunctionInfo::SIMachineFunction
 
     // FIXME: Not really a system SGPR.
     PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg;
+    if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
+      ImplicitArgPtr = true;
+  } else {
+    if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
+      KernargSegmentPtr = true;
   }
 
   CallingConv::ID CC = F->getCallingConv();
   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
-    KernargSegmentPtr = !F->arg_empty();
+    if (!F->arg_empty())
+      KernargSegmentPtr = true;
     WorkGroupIDX = true;
     WorkItemIDX = true;
   } else if (CC == CallingConv::AMDGPU_PS) {

Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h?rev=309398&r1=309397&r2=309398&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h Fri Jul 28 08:52:08 2017
@@ -186,6 +186,10 @@ private:
   // Other shaders indirect 64-bits at sgpr[0:1]
   bool ImplicitBufferPtr : 1;
 
+  // Pointer to where the ABI inserts special kernel arguments separate from the
+  // user arguments. This is an offset from the KernargSegmentPtr.
+  bool ImplicitArgPtr : 1;
+
   MCPhysReg getNextUserSGPR() const {
     assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
     return AMDGPU::SGPR0 + NumUserSGPRs;
@@ -346,6 +350,10 @@ public:
     return WorkItemIDZ;
   }
 
+  bool hasImplicitArgPtr() const {
+    return ImplicitArgPtr;
+  }
+
   bool hasImplicitBufferPtr() const {
     return ImplicitBufferPtr;
   }

Modified: llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll?rev=309398&r1=309397&r2=309398&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll Fri Jul 28 08:52:08 2017
@@ -237,52 +237,59 @@ define void @func_indirect_use_kernarg_s
   ret void
 }
 
-; HSA: define void @use_implicitarg_ptr() #14 {
+; HSA: define amdgpu_kernel void @kern_use_implicitarg_ptr() #15 {
+define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
+  %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+  store volatile i8 addrspace(2)* %implicitarg.ptr, i8 addrspace(2)* addrspace(1)* undef
+  ret void
+}
+
+; HSA: define void @use_implicitarg_ptr() #15 {
 define void @use_implicitarg_ptr() #1 {
   %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
   store volatile i8 addrspace(2)* %implicitarg.ptr, i8 addrspace(2)* addrspace(1)* undef
   ret void
 }
 
-; HSA: define void @func_indirect_use_implicitarg_ptr() #14 {
+; HSA: define void @func_indirect_use_implicitarg_ptr() #15 {
 define void @func_indirect_use_implicitarg_ptr() #1 {
   call void @use_implicitarg_ptr()
   ret void
 }
 
-; HSA: declare void @external.func() #15
+; HSA: declare void @external.func() #16
 declare void @external.func() #3
 
-; HSA: define internal void @defined.func() #15 {
+; HSA: define internal void @defined.func() #16 {
 define internal void @defined.func() #3 {
   ret void
 }
 
-; HSA: define void @func_call_external() #15 {
+; HSA: define void @func_call_external() #16 {
 define void @func_call_external() #3 {
   call void @external.func()
   ret void
 }
 
-; HSA: define void @func_call_defined() #15 {
+; HSA: define void @func_call_defined() #16 {
 define void @func_call_defined() #3 {
   call void @defined.func()
   ret void
 }
 
-; HSA: define void @func_call_asm() #15 {
+; HSA: define void @func_call_asm() #16 {
 define void @func_call_asm() #3 {
   call void asm sideeffect "", ""() #3
   ret void
 }
 
-; HSA: define amdgpu_kernel void @kern_call_external() #16 {
+; HSA: define amdgpu_kernel void @kern_call_external() #17 {
 define amdgpu_kernel void @kern_call_external() #3 {
   call void @external.func()
   ret void
 }
 
-; HSA: define amdgpu_kernel void @func_kern_defined() #16 {
+; HSA: define amdgpu_kernel void @func_kern_defined() #17 {
 define amdgpu_kernel void @func_kern_defined() #3 {
   call void @defined.func()
   ret void
@@ -308,5 +315,6 @@ attributes #3 = { nounwind }
 ; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" }
 ; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" }
 ; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" }
-; HSA: attributes #15 = { nounwind }
-; HSA: attributes #16 = { nounwind "amdgpu-flat-scratch" }
+; HSA: attributes #15 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" }
+; HSA: attributes #16 = { nounwind }
+; HSA: attributes #17 = { nounwind "amdgpu-flat-scratch" }

Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll?rev=309398&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll Fri Jul 28 08:52:08 2017
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA,HSA-NOENV %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa-opencl -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA,HSA-OPENCL %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s
+
+; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty:
+; GCN: enable_sgpr_kernarg_segment_ptr = 1
+
+; HSA-NOENV: kernarg_segment_byte_size = 0
+; HSA-OPENCL: kernarg_segment_byte_size = 32
+; MESA: kernarg_segment_byte_size = 16
+
+; HSA: s_load_dword s0, s[4:5], 0x0
+define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
+  %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+  %cast = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
+  %load = load volatile i32, i32 addrspace(2)* %cast
+  ret void
+}
+
+; GCN-LABEL: {{^}}kernel_implicitarg_ptr:
+; GCN: enable_sgpr_kernarg_segment_ptr = 1
+
+; HSA-NOENV: kernarg_segment_byte_size = 112
+; HSA-OPENCL: kernarg_segment_byte_size = 144
+; MESA: kernarg_segment_byte_size = 464
+
+; HSA: s_load_dword s0, s[4:5], 0x1c
+define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
+  %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+  %cast = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
+  %load = load volatile i32, i32 addrspace(2)* %cast
+  ret void
+}
+
+declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #2
+
+attributes #0 = { nounwind noinline }
+attributes #1 = { nounwind noinline }
+attributes #2 = { nounwind readnone speculatable }




More information about the llvm-commits mailing list