[llvm] r363602 - AMDGPU/GFX10: Don't generate s_code_end padding in the asm-printer
Nicolai Haehnle via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 17 12:28:44 PDT 2019
Author: nha
Date: Mon Jun 17 12:28:43 2019
New Revision: 363602
URL: http://llvm.org/viewvc/llvm-project?rev=363602&view=rev
Log:
AMDGPU/GFX10: Don't generate s_code_end padding in the asm-printer
Summary:
The purpose of the padding is to guard against stale code being
fetched into the instruction cache by the lowest level prefetching.
We're generating relocatable ELF here, and so the padding should
arguably be added by the linker. This is in fact what Mesa does.
This also fixes multi-part shaders for Mesa.
Change-Id: I6bfede58f20e9f337762ccf39ef9e0e263e69e82
Reviewers: arsenm, rampitec, t-tye
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63427
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/trunk/test/CodeGen/AMDGPU/s_code_end.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=363602&r1=363601&r2=363602&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Mon Jun 17 12:28:43 2019
@@ -309,7 +309,13 @@ void AMDGPUAsmPrinter::EmitGlobalVariabl
bool AMDGPUAsmPrinter::doFinalization(Module &M) {
CallGraphResourceInfo.clear();
- if (AMDGPU::isGFX10(*getGlobalSTI())) {
+ // Pad with s_code_end to help tools and guard against instruction prefetch
+ // causing stale data in caches. Arguably this should be done by the linker,
+ // which is why this isn't done for Mesa.
+ const MCSubtargetInfo &STI = *getGlobalSTI();
+ if (AMDGPU::isGFX10(STI) &&
+ (STI.getTargetTriple().getOS() == Triple::AMDHSA ||
+ STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
getTargetStreamer()->EmitCodeEnd();
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/s_code_end.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/s_code_end.ll?rev=363602&r1=363601&r2=363602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/s_code_end.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/s_code_end.ll Mon Jun 17 12:28:43 2019
@@ -1,11 +1,13 @@
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10,GFX10-ASM %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump -arch=amdgcn -mcpu=gfx1010 -disassemble - | FileCheck -check-prefixes=GCN,GCN-OBJ,GFX10,GFX10-OBJ %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10END,GFX10END-ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump -arch=amdgcn -mcpu=gfx1010 -disassemble - | FileCheck -check-prefixes=GCN,GCN-OBJ,GFX10END,GFX10END-OBJ %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10END,GFX10END-ASM %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10NOEND,GFX10NOEND-ASM %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump -arch=amdgcn -mcpu=gfx1010 -disassemble - | FileCheck -check-prefixes=GCN,GCN-OBJ,GFX10NOEND,GFX10NOEND-OBJ %s
; GCN: a_kernel1:
-; GCN-NEXT: s_endpgm
-; GCN-ASM-NEXT: [[END_LABEL1:\.Lfunc_end.*]]:
+; GCN: s_endpgm
+; GCN-ASM: [[END_LABEL1:\.Lfunc_end.*]]:
; GCN-ASM-NEXT: .size a_kernel1, [[END_LABEL1]]-a_kernel1
-; GCN-ASM: .section .AMDGPU.config
; GCN-OBJ-NEXT: s_nop 0
@@ -14,19 +16,17 @@ define amdgpu_kernel void @a_kernel1() {
}
; GCN: a_kernel2:
-; GCN-NEXT: s_endpgm
-; GCN-ASM-NEXT: [[END_LABEL2:\.Lfunc_end.*]]:
+; GCN: s_endpgm
+; GCN-ASM: [[END_LABEL2:\.Lfunc_end.*]]:
; GCN-ASM-NEXT: .size a_kernel2, [[END_LABEL2]]-a_kernel2
-; GCN-ASM: .section .AMDGPU.config
-; GCN-OBJ-NEXT: {{^$}}
+; GCN-OBJ: {{^$}}
define amdgpu_kernel void @a_kernel2() {
ret void
}
-; GCN-ASM: .text
-; GCN-ASM-NEXT: .globl a_function
+; GCN-ASM: .globl a_function
; GCN-ASM-NEXT: .p2align 2
; GCN-ASM-NEXT: .type a_function, at function
@@ -34,46 +34,48 @@ define amdgpu_kernel void @a_kernel2() {
; GCN: s_setpc_b64
; GCN-ASM-NEXT: [[END_LABEL3:\.Lfunc_end.*]]:
; GCN-ASM-NEXT: .size a_function, [[END_LABEL3]]-a_function
-; GFX10-ASM: .p2alignl 6, 3214868480
-; GFX10-ASM-NEXT: .fill 32, 4, 3214868480
-
-; GFX10-OBJ-NEXT: s_code_end
-
-; GFX10-OBJ: s_code_end // 000000000140:
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
+; GFX10END-ASM: .p2alignl 6, 3214868480
+; GFX10END-ASM-NEXT: .fill 32, 4, 3214868480
+; GFX10NOEND-NOT: .fill
+
+; GFX10NOEND-OBJ-NOT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+
+; GFX10END-OBJ: s_code_end // 000000000140:
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
define void @a_function() {
ret void
More information about the llvm-commits
mailing list