[PATCH] D80338: [AMDGPU] GFX10 PAL: Only pad with s_code_end in pipeline compilation
Tim Renouf via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed May 20 16:33:45 PDT 2020
tpr created this revision.
Herald added subscribers: llvm-commits, kerbowa, hiraditya, t-tye, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl, arsenm.
Herald added a project: LLVM.
tpr added reviewers: nhaehnle, rampitec.
For PAL, GFX10 s_code_end padding is now added only for a full pipeline
compilation, not a shader compilation.
Change-Id: I0d4fa364178a79c47d8e22177c424722e3c9686d
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D80338
Files:
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/test/CodeGen/AMDGPU/s_code_end.ll
llvm/test/CodeGen/AMDGPU/s_code_end_pal_pipeline.ll
Index: llvm/test/CodeGen/AMDGPU/s_code_end_pal_pipeline.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/s_code_end_pal_pipeline.ll
+++ llvm/test/CodeGen/AMDGPU/s_code_end_pal_pipeline.ll
@@ -1,8 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10END,GFX10END-ASM %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx1010 -d - | FileCheck --check-prefixes=GCN,GCN-OBJ,GFX10END,GFX10END-OBJ %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10END,GFX10END-ASM %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10NOEND,GFX10NOEND-ASM %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx1010 -d - | FileCheck --check-prefixes=GCN,GCN-OBJ,GFX10NOEND,GFX10NOEND-OBJ %s
; GCN: a_kernel1{{>?}}:
; GCN: s_endpgm
@@ -11,7 +7,7 @@
; GCN-OBJ-NEXT: s_nop 0
-define amdgpu_kernel void @a_kernel1() {
+define amdgpu_vs void @a_kernel1() {
ret void
}
@@ -22,7 +18,7 @@
; GCN-OBJ: {{^$}}
-define amdgpu_kernel void @a_kernel2() {
+define amdgpu_ps void @a_kernel2() {
ret void
}
Index: llvm/test/CodeGen/AMDGPU/s_code_end.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/s_code_end.ll
+++ llvm/test/CodeGen/AMDGPU/s_code_end.ll
@@ -1,6 +1,6 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10END,GFX10END-ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx1010 -d - | FileCheck --check-prefixes=GCN,GCN-OBJ,GFX10END,GFX10END-OBJ %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10END,GFX10END-ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10NOEND,GFX10NOEND-ASM %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10NOEND,GFX10NOEND-ASM %s
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx1010 -d - | FileCheck --check-prefixes=GCN,GCN-OBJ,GFX10NOEND,GFX10NOEND-OBJ %s
Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -320,11 +320,35 @@
// causing stale data in caches. Arguably this should be done by the linker,
// which is why this isn't done for Mesa.
const MCSubtargetInfo &STI = *getGlobalSTI();
- if (AMDGPU::isGFX10(STI) &&
- (STI.getTargetTriple().getOS() == Triple::AMDHSA ||
- STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
- getTargetStreamer()->EmitCodeEnd();
+ if (AMDGPU::isGFX10(STI)) {
+ bool AddGuard = STI.getTargetTriple().getOS() == Triple::AMDHSA;
+ if (!AddGuard && STI.getTargetTriple().getOS() == Triple::AMDPAL) {
+ // On PAL, we add the guard only if doing a full pipeline compile, as
+ // there is no later link step. We can spot a full pipeline compile by
+ // there being a PS and at least one of GS or VS, or instead a CS.
+ bool GotPs = false, GotGsOrVs = false, GotCs = false;
+ for (Function &F : M) {
+ if (!F.isDeclaration()) {
+ switch (F.getCallingConv()) {
+ case CallingConv::AMDGPU_CS:
+ GotCs = true;
+ break;
+ case CallingConv::AMDGPU_PS:
+ GotPs = true;
+ break;
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_VS:
+ GotGsOrVs = true;
+ break;
+ }
+ }
+ }
+ AddGuard = (GotPs && GotGsOrVs) || GotCs;
+ }
+ if (AddGuard) {
+ OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+ getTargetStreamer()->EmitCodeEnd();
+ }
}
return AsmPrinter::doFinalization(M);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D80338.265377.patch
Type: text/x-patch
Size: 4390 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200520/018d6447/attachment.bin>
More information about the llvm-commits
mailing list