[llvm] bb86220 - AMDGPU: Don't handle kernarg.segment.ptr in functions

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 13 12:51:43 PDT 2020


Author: Matt Arsenault
Date: 2020-03-13T12:51:12-07:00
New Revision: bb8622094d77417c629e45fc9964d0b699019f22

URL: https://github.com/llvm/llvm-project/commit/bb8622094d77417c629e45fc9964d0b699019f22
DIFF: https://github.com/llvm/llvm-project/commit/bb8622094d77417c629e45fc9964d0b699019f22.diff

LOG: AMDGPU: Don't handle kernarg.segment.ptr in functions

Just lower this to null. Pass implicitarg.ptr in its place in the
argument list.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
    llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
    llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index ab55ee45a6b1..8aafcd40e1cf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -216,7 +216,7 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
       "amdgpu-work-item-id-z",      "amdgpu-work-group-id-x",
       "amdgpu-work-group-id-y",     "amdgpu-work-group-id-z",
       "amdgpu-dispatch-ptr",        "amdgpu-dispatch-id",
-      "amdgpu-kernarg-segment-ptr", "amdgpu-implicitarg-ptr"};
+      "amdgpu-implicitarg-ptr"};
 
   if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
     NeedQueuePtr = true;
@@ -305,11 +305,16 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
           Changed = true;
         } else {
           bool NonKernelOnly = false;
-          StringRef AttrName = intrinsicToAttrName(IID,
-                                                   NonKernelOnly, NeedQueuePtr);
-          if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
-            F.addFnAttr(AttrName);
-            Changed = true;
+
+          if (!IsFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
+            F.addFnAttr("amdgpu-kernarg-segment-ptr");
+          } else {
+            StringRef AttrName = intrinsicToAttrName(IID, NonKernelOnly,
+                                                     NeedQueuePtr);
+            if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
+              F.addFnAttr(AttrName);
+              Changed = true;
+            }
           }
         }
       }

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 99dd11c11b66..4bf92904e1ab 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -3747,6 +3747,14 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
     return false;
   }
   case Intrinsic::amdgcn_kernarg_segment_ptr:
+    if (!AMDGPU::isKernel(B.getMF().getFunction().getCallingConv())) {
+      B.setInstr(MI);
+      // This only makes sense to call in a kernel, so just lower to null.
+      B.buildConstant(MI.getOperand(0).getReg(), 0);
+      MI.eraseFromParent();
+      return true;
+    }
+
     return legalizePreloadedArgIntrin(
       MI, MRI, B, AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
   case Intrinsic::amdgcn_implicitarg_ptr:

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 2cc891a72665..c3efa23795f4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1724,8 +1724,10 @@ void SITargetLowering::allocateSpecialInputSGPRs(
   if (Info.hasQueuePtr())
     ArgInfo.QueuePtr = allocateSGPR64Input(CCInfo);
 
-  if (Info.hasKernargSegmentPtr())
-    ArgInfo.KernargSegmentPtr = allocateSGPR64Input(CCInfo);
+  // Implicit arg ptr takes the place of the kernarg segment pointer. This is a
+  // constant offset from the kernarg segment.
+  if (Info.hasImplicitArgPtr())
+    ArgInfo.ImplicitArgPtr = allocateSGPR64Input(CCInfo);
 
   if (Info.hasDispatchID())
     ArgInfo.DispatchID = allocateSGPR64Input(CCInfo);
@@ -1740,9 +1742,6 @@ void SITargetLowering::allocateSpecialInputSGPRs(
 
   if (Info.hasWorkGroupIDZ())
     ArgInfo.WorkGroupIDZ = allocateSGPR32Input(CCInfo);
-
-  if (Info.hasImplicitArgPtr())
-    ArgInfo.ImplicitArgPtr = allocateSGPR64Input(CCInfo);
 }
 
 // Allocate special inputs passed in user SGPRs.
@@ -2448,12 +2447,11 @@ void SITargetLowering::passSpecialInputs(
   AMDGPUFunctionArgInfo::PreloadedValue InputRegs[] = {
     AMDGPUFunctionArgInfo::DISPATCH_PTR,
     AMDGPUFunctionArgInfo::QUEUE_PTR,
-    AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR,
+    AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR,
     AMDGPUFunctionArgInfo::DISPATCH_ID,
     AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
     AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
-    AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,
-    AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR
+    AMDGPUFunctionArgInfo::WORKGROUP_ID_Z
   };
 
   for (auto InputID : InputRegs) {
@@ -5735,6 +5733,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                              AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
   }
   case Intrinsic::amdgcn_kernarg_segment_ptr: {
+    if (!AMDGPU::isKernel(MF.getFunction().getCallingConv())) {
+      // This only makes sense to call in a kernel, so just lower to null.
+      return DAG.getConstant(0, DL, VT);
+    }
+
     return getPreloadedValue(DAG, *MFI, VT,
                              AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
   }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
index 28e4684fface..e3fd488af5e0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -114,6 +114,16 @@ define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_r
   ret void
 }
 
+; ALL-LABEL: {{^}}func_kernarg_segment_ptr:
+; ALL: s_mov_b32 [[S_LO:s[0-9]+]], 0{{$}}
+; ALL: s_mov_b32 [[S_HI:s[0-9]+]], 0{{$}}
+; ALL: v_mov_b32_e32 v0, [[S_LO]]{{$}}
+; ALL: v_mov_b32_e32 v1, [[S_HI]]{{$}}
+define i8 addrspace(4)* @func_kernarg_segment_ptr() {
+  %ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+  ret i8 addrspace(4)* %ptr
+}
+
 declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
 declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
 

diff  --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index 82ba28526d35..9ae7cc84167b 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -231,7 +231,7 @@ define void @use_kernarg_segment_ptr() #1 {
   ret void
 }
 
-; HSA: define void @func_indirect_use_kernarg_segment_ptr() #14 {
+; HSA: define void @func_indirect_use_kernarg_segment_ptr() #11 {
 define void @func_indirect_use_kernarg_segment_ptr() #1 {
   call void @use_kernarg_segment_ptr()
   ret void

diff  --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
index 94adaed43b23..601ed9698c61 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
@@ -66,10 +66,10 @@ define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
   ret void
 }
 
+; Not really supported in callable functions.
 ; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
-; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s4
-; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s5
-; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: s_mov_b64 [[PTR:s\[[0-9]+:[0-9]+\]]], 0{{$}}
+; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0{{$}}
 define hidden void @use_kernarg_segment_ptr() #1 {
   %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
   %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
@@ -79,10 +79,6 @@ define hidden void @use_kernarg_segment_ptr() #1 {
 
 ; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr:
 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; GCN-NOT: s[4:5]
-; GCN-NOT: s4
-; GCN-NOT: s5
-; GCN: s_swappc_b64
 define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 {
   call void @use_kernarg_segment_ptr()
   ret void
@@ -437,9 +433,9 @@ define hidden void @use_every_sgpr_input() #1 {
   %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
   %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
 
-  %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
-  %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
-  %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
+  %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
+  %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+  %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc
 
   %val3 = call i64 @llvm.amdgcn.dispatch.id()
   call void asm sideeffect "; use $0", "s"(i64 %val3)
@@ -521,9 +517,9 @@ define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
   %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
   %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
 
-  %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
-  %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
-  %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
+  %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
+  %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+  %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc
 
   %val3 = call i64 @llvm.amdgcn.dispatch.id()
   call void asm sideeffect "; use $0", "s"(i64 %val3)
@@ -590,9 +586,9 @@ define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill()
   %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
   %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
 
-  %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
-  %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
-  %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
+  %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
+  %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+  %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc
 
   %val3 = call i64 @llvm.amdgcn.dispatch.id()
   call void asm sideeffect "; use $0", "s"(i64 %val3)
@@ -614,6 +610,7 @@ declare i32 @llvm.amdgcn.workgroup.id.y() #0
 declare i32 @llvm.amdgcn.workgroup.id.z() #0
 declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
 declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
+declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
 declare i64 @llvm.amdgcn.dispatch.id() #0
 declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
 

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
index 0589da71fd62..851c3aa8ef49 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
@@ -165,18 +165,13 @@ define void @opencl_func_call_implicitarg_ptr_func() #0 {
 
 ; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr:
 ; GCN: s_waitcnt
-; MESA-DAG: v_mov_b32_e32 v0, s4
-; MESA-DAG: v_mov_b32_e32 v1, s5
-; MESA-DAG: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
-; MESA: v_mov_b32_e32 v0, s6
-; MESA: v_mov_b32_e32 v1, s7
+; GCN-DAG: v_mov_b32_e32 v0, s4
+; GCN-DAG: v_mov_b32_e32 v1, s5
+; GCN-DAG: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0
+; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0
+
 ; MESA: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
 
-; HSA: v_mov_b32_e32 v0, s4
-; HSA: v_mov_b32_e32 v1, s5
-; HSA: flat_load_dword v0, v[0:1]
-; HSA: v_mov_b32_e32 v0, s6
-; HSA: v_mov_b32_e32 v1, s7
 ; HSA: flat_load_dword v0, v[0:1]
 
 ; GCN: s_waitcnt vmcnt(0)
@@ -192,20 +187,12 @@ define void @func_kernarg_implicitarg_ptr() #0 {
 
 ; GCN-LABEL: {{^}}opencl_func_kernarg_implicitarg_ptr:
 ; GCN: s_waitcnt
-; MESA-DAG: v_mov_b32_e32 v0, s4
-; MESA-DAG: v_mov_b32_e32 v1, s5
-; MESA: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
-; MESA-DAG: v_mov_b32_e32 v0, s6
-; MESA-DAG: v_mov_b32_e32 v1, s7
-; MESA: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
-
+; GCN-DAG: v_mov_b32_e32 v0, s4
+; GCN-DAG: v_mov_b32_e32 v1, s5
+; GCN-DAG: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0
+; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0
 
-; HSA: v_mov_b32_e32 v0, s4
-; HSA: v_mov_b32_e32 v1, s5
-; HSA: flat_load_dword v0, v[0:1]
-
-; HSA: v_mov_b32_e32 v0, s6
-; HSA: v_mov_b32_e32 v1, s7
+; MESA: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
 ; HSA: flat_load_dword v0, v[0:1]
 
 ; GCN: s_waitcnt vmcnt(0)
@@ -220,8 +207,8 @@ define void @opencl_func_kernarg_implicitarg_ptr() #0 {
 }
 
 ; GCN-LABEL: {{^}}kernel_call_kernarg_implicitarg_ptr_func:
-; GCN: s_add_u32 s6, s4, 0x70
-; GCN: s_addc_u32 s7, s5, 0
+; GCN: s_add_u32 s4, s4, 0x70
+; GCN: s_addc_u32 s5, s5, 0
 ; GCN: s_swappc_b64
 define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8]) #0 {
   call void @func_kernarg_implicitarg_ptr()


        


More information about the llvm-commits mailing list