[llvm] r313616 - AMDGPU: Run internalize symbols at -O0
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 19 00:40:11 PDT 2017
Author: arsenm
Date: Tue Sep 19 00:40:11 2017
New Revision: 313616
URL: http://llvm.org/viewvc/llvm-project?rev=313616&view=rev
Log:
AMDGPU: Run internalize symbols at -O0
The relocations used for externally visible functions
aren't supported, so the direct call emitted ends
up hitting a linker error.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/test/CodeGen/AMDGPU/internalize.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=313616&r1=313615&r2=313616&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Tue Sep 19 00:40:11 2017
@@ -319,16 +319,34 @@ static ImmutablePass *createAMDGPUExtern
});
}
+/// Predicate for Internalize pass.
+bool mustPreserveGV(const GlobalValue &GV) {
+ if (const Function *F = dyn_cast<Function>(&GV))
+ return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
+
+ return !GV.use_empty();
+}
+
void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
Builder.DivergentTarget = true;
bool EnableOpt = getOptLevel() > CodeGenOpt::None;
- bool Internalize = InternalizeSymbols && EnableOpt &&
- (getTargetTriple().getArch() == Triple::amdgcn);
+ bool Internalize = InternalizeSymbols;
bool EarlyInline = EarlyInlineAll && EnableOpt;
bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
+ if (Internalize) {
+ // If we're generating code, we always have the whole program available. The
+ // relocations expected for externally visible functions aren't supported,
+ // so make sure every non-entry function is hidden.
+ Builder.addExtension(
+ PassManagerBuilder::EP_EnabledOnOptLevel0,
+ [](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ PM.add(createInternalizePass(mustPreserveGV));
+ });
+ }
+
Builder.addExtension(
PassManagerBuilder::EP_ModuleOptimizerEarly,
[Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
@@ -339,25 +357,7 @@ void AMDGPUTargetMachine::adjustPassMana
}
PM.add(createAMDGPUUnifyMetadataPass());
if (Internalize) {
- PM.add(createInternalizePass([=](const GlobalValue &GV) -> bool {
- if (const Function *F = dyn_cast<Function>(&GV)) {
- if (F->isDeclaration())
- return true;
- switch (F->getCallingConv()) {
- default:
- return false;
- case CallingConv::AMDGPU_VS:
- case CallingConv::AMDGPU_HS:
- case CallingConv::AMDGPU_GS:
- case CallingConv::AMDGPU_PS:
- case CallingConv::AMDGPU_CS:
- case CallingConv::AMDGPU_KERNEL:
- case CallingConv::SPIR_KERNEL:
- return true;
- }
- }
- return !GV.use_empty();
- }));
+ PM.add(createInternalizePass(mustPreserveGV));
PM.add(createGlobalDCEPass());
}
if (EarlyInline)
Modified: llvm/trunk/test/CodeGen/AMDGPU/internalize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/internalize.ll?rev=313616&r1=313615&r2=313616&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/internalize.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/internalize.ll Tue Sep 19 00:40:11 2017
@@ -1,35 +1,68 @@
-; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck %s
-; CHECK-NOT: unused
-; CHECK-NOT: foo_used
-; CHECK: gvar_used
-; CHECK: main_kernel
+; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPT %s
+; RUN: opt -O0 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPTNONE %s
+; OPT-NOT: gvar_unused
+; OPTNONE: gvar_unused
@gvar_unused = addrspace(1) global i32 undef, align 4
+
+; ALL: gvar_used
@gvar_used = addrspace(1) global i32 undef, align 4
-; Function Attrs: alwaysinline nounwind
-define amdgpu_kernel void @foo_unused(i32 addrspace(1)* %out) local_unnamed_addr #1 {
+; ALL: define internal fastcc void @func_used(
+define fastcc void @func_used(i32 addrspace(1)* %out, i32 %tid) #1 {
+entry:
+ store volatile i32 %tid, i32 addrspace(1)* %out
+ ret void
+}
+
+; ALL: define internal fastcc void @func_used_noinline(
+define fastcc void @func_used_noinline(i32 addrspace(1)* %out, i32 %tid) #2 {
+entry:
+ store volatile i32 %tid, i32 addrspace(1)* %out
+ ret void
+}
+
+; OPTNONE: define internal fastcc void @func_used_alwaysinline(
+; OPT-NOT: @func_used_alwaysinline
+define fastcc void @func_used_alwaysinline(i32 addrspace(1)* %out, i32 %tid) #3 {
entry:
- store i32 1, i32 addrspace(1)* %out
+ store volatile i32 %tid, i32 addrspace(1)* %out
ret void
}
-; Function Attrs: alwaysinline nounwind
-define amdgpu_kernel void @foo_used(i32 addrspace(1)* %out, i32 %tid) local_unnamed_addr #1 {
+; OPTNONE: define internal void @func_unused(
+; OPT-NOT: @func_unused
+define void @func_unused(i32 addrspace(1)* %out, i32 %tid) #2 {
entry:
- store i32 %tid, i32 addrspace(1)* %out
+ store volatile i32 %tid, i32 addrspace(1)* %out
ret void
}
+; ALL: define amdgpu_kernel void @kernel_unused(
+define amdgpu_kernel void @kernel_unused(i32 addrspace(1)* %out) #1 {
+entry:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; ALL: define amdgpu_kernel void @main_kernel()
+; ALL: tail call i32 @llvm.amdgcn.workitem.id.x
+; ALL: tail call fastcc void @func_used
+; ALL: tail call fastcc void @func_used_noinline
+; ALL: store volatile
+; ALL: ret void
define amdgpu_kernel void @main_kernel() {
entry:
- %tid = call i32 @llvm.amdgcn.workitem.id.x()
- tail call void @foo_used(i32 addrspace(1)* @gvar_used, i32 %tid) nounwind
+ %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
+ tail call fastcc void @func_used(i32 addrspace(1)* @gvar_used, i32 %tid)
+ tail call fastcc void @func_used_noinline(i32 addrspace(1)* @gvar_used, i32 %tid)
+ tail call fastcc void @func_used_alwaysinline(i32 addrspace(1)* @gvar_used, i32 %tid)
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }
-
-attributes #1 = { alwaysinline nounwind }
+attributes #1 = { nounwind }
+attributes #2 = { noinline nounwind }
+attributes #3 = { alwaysinline nounwind }
More information about the llvm-commits
mailing list