[PATCH] D31480: AMDGPU: Don't emit amd_kernel_code_t for callable functions

Wed Mar 29 15:21:49 PDT 2017

arsenm created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl.

This is inserted directly in the text section. The relocation
for the function ends up resolving to the beginning of the
amd_kernel_code_t header rather than the actual function
entry point.

      

Also skip some of the comments for initialization
that only makes sense for kernels.


https://reviews.llvm.org/D31480

Files:
  lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
  test/CodeGen/AMDGPU/hsa-func.ll


Index: test/CodeGen/AMDGPU/hsa-func.ll
===================================================================

--- test/CodeGen/AMDGPU/hsa-func.ll
+++ test/CodeGen/AMDGPU/hsa-func.ll
@@ -27,7 +27,7 @@
 
 ; ELF: Symbol {
 ; ELF: Name: simple
-; ELF: Size: 304
+; ELF: Size: 48
 ; ELF: Type: Function (0x2)
 ; ELF: }
 
@@ -40,10 +40,7 @@
 ; HSA: .globl simple
 ; HSA: .p2align 2
 ; HSA: {{^}}simple:
-; HSA: .amd_kernel_code_t
-; HSA: enable_sgpr_private_segment_buffer = 0
-; HSA: enable_sgpr_kernarg_segment_ptr = 0
-; HSA: .end_amd_kernel_code_t
+; HSA-NOT: amd_kernel_code_t
 ; HSA-NOT: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
 
 ; Make sure we are setting the ATC bit:
@@ -55,7 +52,8 @@
 
 ; HSA: .Lfunc_end0:
 ; HSA: .size   simple, .Lfunc_end0-simple
-
+; HSA: ; Function info:
+; HSA-NOT: COMPUTE_PGM_RSRC2
 define void @simple(i32 addrspace(1)* addrspace(2)* %ptr.out) {
 entry:
   %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out
Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -144,6 +144,10 @@
 }
 
 void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
+  const AMDGPUMachineFunction *MFI = MF->getInfo<AMDGPUMachineFunction>();
+  if (!MFI->isEntryFunction())
+    return;
+
   const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
   SIProgramInfo KernelInfo;
   amd_kernel_code_t KernelCode;
@@ -222,13 +226,19 @@
     OutStreamer->SwitchSection(CommentSection);
 
     if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-      OutStreamer->emitRawComment(" Kernel info:", false);
+      if (MFI->isEntryFunction()) {
+        OutStreamer->emitRawComment(" Kernel info:", false);
+      } else {
+        OutStreamer->emitRawComment(" Function info:", false);
+      }
+
       OutStreamer->emitRawComment(" codeLenInByte = " +
                                   Twine(getFunctionCodeSize(MF)), false);
       OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
                                   false);
       OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
                                   false);
+
       OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
                                   false);
       OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
@@ -238,6 +248,9 @@
       OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) +
                                   " bytes/workgroup (compile time only)", false);
 
+      if (!MFI->isEntryFunction())
+        return false;
+
       OutStreamer->emitRawComment(" SGPRBlocks: " +
                                   Twine(KernelInfo.SGPRBlocks), false);
       OutStreamer->emitRawComment(" VGPRBlocks: " +


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D31480.93416.patch
Type: text/x-patch
Size: 2894 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170329/7a37622d/attachment.bin>