[PATCH] D29214: [AMDGPU] Internalize non-kernel symbols
Stanislav Mekhanoshin via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 30 13:16:31 PST 2017
This revision was automatically updated to reflect the committed changes.
Closed by commit rL293549: [AMDGPU] Internalize non-kernel symbols (authored by rampitec).
Changed prior to commit:
https://reviews.llvm.org/D29214?vs=86158&id=86336#toc
Repository:
rL LLVM
https://reviews.llvm.org/D29214
Files:
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/test/CodeGen/AMDGPU/internalize.ll
Index: llvm/trunk/test/CodeGen/AMDGPU/internalize.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/internalize.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/internalize.ll
@@ -0,0 +1,35 @@
+; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck %s
+; CHECK-NOT: unused
+; CHECK-NOT: foo_used
+; CHECK: gvar_used
+; CHECK: main_kernel
+
+ at gvar_unused = addrspace(1) global i32 undef, align 4
+ at gvar_used = addrspace(1) global i32 undef, align 4
+
+; Function Attrs: alwaysinline nounwind
+define void @foo_unused(i32 addrspace(1)* %out) local_unnamed_addr #1 {
+entry:
+ store i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; Function Attrs: alwaysinline nounwind
+define void @foo_used(i32 addrspace(1)* %out, i32 %tid) local_unnamed_addr #1 {
+entry:
+ store i32 %tid, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @main_kernel() {
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ tail call void @foo_used(i32 addrspace(1)* @gvar_used, i32 %tid) nounwind
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+attributes #0 = { nounwind readnone }
+
+attributes #1 = { alwaysinline nounwind }
Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -84,6 +84,13 @@
cl::init(false),
cl::Hidden);
+// Option to run internalize pass.
+static cl::opt<bool> InternalizeSymbols(
+ "amdgpu-internalize-symbols",
+ cl::desc("Enable elimination of non-kernel functions and unused globals"),
+ cl::init(false),
+ cl::Hidden);
+
extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -207,11 +214,35 @@
}
void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
+ bool Internalize = InternalizeSymbols &&
+ (getOptLevel() > CodeGenOpt::None) &&
+ (getTargetTriple().getArch() == Triple::amdgcn);
Builder.addExtension(
PassManagerBuilder::EP_ModuleOptimizerEarly,
- [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ [Internalize](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
PM.add(createAMDGPUUnifyMetadataPass());
- });
+ if (Internalize) {
+ PM.add(createInternalizePass([=](const GlobalValue &GV) -> bool {
+ if (const Function *F = dyn_cast<Function>(&GV)) {
+ if (F->isDeclaration())
+ return true;
+ switch (F->getCallingConv()) {
+ default:
+ return false;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return true;
+ }
+ }
+ return !GV.use_empty();
+ }));
+ PM.add(createGlobalDCEPass());
+ }
+ });
}
//===----------------------------------------------------------------------===//
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D29214.86336.patch
Type: text/x-patch
Size: 3282 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170130/dbea2ab4/attachment.bin>
More information about the llvm-commits
mailing list