[PATCH] D80338: [AMDGPU] GFX10 PAL: Only pad with s_code_end in pipeline compilation

Wed May 20 16:33:45 PDT 2020

tpr created this revision.
Herald added subscribers: llvm-commits, kerbowa, hiraditya, t-tye, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl, arsenm.
Herald added a project: LLVM.
tpr added reviewers: nhaehnle, rampitec.

For PAL, GFX10 s_code_end padding is now added only for a full pipeline
compilation, not a shader compilation.

Change-Id: I0d4fa364178a79c47d8e22177c424722e3c9686d


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D80338

Files:
  llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
  llvm/test/CodeGen/AMDGPU/s_code_end.ll
  llvm/test/CodeGen/AMDGPU/s_code_end_pal_pipeline.ll


Index: llvm/test/CodeGen/AMDGPU/s_code_end_pal_pipeline.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/s_code_end_pal_pipeline.ll
+++ llvm/test/CodeGen/AMDGPU/s_code_end_pal_pipeline.ll
@@ -1,8 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10END,GFX10END-ASM %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx1010 -d - | FileCheck --check-prefixes=GCN,GCN-OBJ,GFX10END,GFX10END-OBJ %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10END,GFX10END-ASM %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10NOEND,GFX10NOEND-ASM %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx1010 -d - | FileCheck --check-prefixes=GCN,GCN-OBJ,GFX10NOEND,GFX10NOEND-OBJ %s
 
 ; GCN:            a_kernel1{{>?}}:
 ; GCN:                    s_endpgm
@@ -11,7 +7,7 @@
 
 ; GCN-OBJ-NEXT:           s_nop 0
 
-define amdgpu_kernel void @a_kernel1() {
+define amdgpu_vs void @a_kernel1() {
   ret void
 }
 
@@ -22,7 +18,7 @@
 
 ; GCN-OBJ:   {{^$}}
 
-define amdgpu_kernel void @a_kernel2() {
+define amdgpu_ps void @a_kernel2() {
   ret void
 }
 
Index: llvm/test/CodeGen/AMDGPU/s_code_end.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/s_code_end.ll
+++ llvm/test/CodeGen/AMDGPU/s_code_end.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10END,GFX10END-ASM %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx1010 -d - | FileCheck --check-prefixes=GCN,GCN-OBJ,GFX10END,GFX10END-OBJ %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10END,GFX10END-ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10NOEND,GFX10NOEND-ASM %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10NOEND,GFX10NOEND-ASM %s
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx1010 -d - | FileCheck --check-prefixes=GCN,GCN-OBJ,GFX10NOEND,GFX10NOEND-OBJ %s
 
Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -320,11 +320,35 @@
   // causing stale data in caches. Arguably this should be done by the linker,
   // which is why this isn't done for Mesa.
   const MCSubtargetInfo &STI = *getGlobalSTI();
-  if (AMDGPU::isGFX10(STI) &&
-      (STI.getTargetTriple().getOS() == Triple::AMDHSA ||
-       STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
-    OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
-    getTargetStreamer()->EmitCodeEnd();
+  if (AMDGPU::isGFX10(STI)) {
+    bool AddGuard = STI.getTargetTriple().getOS() == Triple::AMDHSA;
+    if (!AddGuard && STI.getTargetTriple().getOS() == Triple::AMDPAL) {
+      // On PAL, we add the guard only if doing a full pipeline compile, as
+      // there is no later link step. We can spot a full pipeline compile by
+      // there being a PS and at least one of GS or VS, or instead a CS.
+      bool GotPs = false, GotGsOrVs = false, GotCs = false;
+      for (Function &F : M) {
+        if (!F.isDeclaration()) {
+          switch (F.getCallingConv()) {
+          case CallingConv::AMDGPU_CS:
+            GotCs = true;
+            break;
+          case CallingConv::AMDGPU_PS:
+            GotPs = true;
+            break;
+          case CallingConv::AMDGPU_GS:
+          case CallingConv::AMDGPU_VS:
+            GotGsOrVs = true;
+            break;
+          }
+        }
+      }
+      AddGuard = (GotPs && GotGsOrVs) || GotCs;
+    }
+    if (AddGuard) {
+      OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+      getTargetStreamer()->EmitCodeEnd();
+    }
   }
 
   return AsmPrinter::doFinalization(M);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D80338.265377.patch
Type: text/x-patch
Size: 4390 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200520/018d6447/attachment.bin>