[llvm] r293549 - [AMDGPU] Internalize non-kernel symbols

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 30 13:05:19 PST 2017


Author: rampitec
Date: Mon Jan 30 15:05:18 2017
New Revision: 293549

URL: http://llvm.org/viewvc/llvm-project?rev=293549&view=rev
Log:
[AMDGPU] Internalize non-kernel symbols

Since we have no call support and late linking we can produce code
only for used symbols. This saves compilation time, size of the final
executable, and size of any intermediate dumps.

Run Internalize pass early in the opt pipeline followed by global
DCE pass. To enable it RT can pass -amdgpu-internalize-symbols option.

Differential Revision: https://reviews.llvm.org/D29214

Added:
    llvm/trunk/test/CodeGen/AMDGPU/internalize.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=293549&r1=293548&r2=293549&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Mon Jan 30 15:05:18 2017
@@ -84,6 +84,13 @@ static cl::opt<bool> ScalarizeGlobal(
   cl::init(false),
   cl::Hidden);
 
+// Option to run internalize pass.
+static cl::opt<bool> InternalizeSymbols(
+  "amdgpu-internalize-symbols",
+  cl::desc("Enable elimination of non-kernel functions and unused globals"),
+  cl::init(false),
+  cl::Hidden);
+
 extern "C" void LLVMInitializeAMDGPUTarget() {
   // Register the target
   RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -207,11 +214,35 @@ StringRef AMDGPUTargetMachine::getFeatur
 }
 
 void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
+  bool Internalize = InternalizeSymbols &&
+                     (getOptLevel() > CodeGenOpt::None) &&
+                     (getTargetTriple().getArch() == Triple::amdgcn);
   Builder.addExtension(
     PassManagerBuilder::EP_ModuleOptimizerEarly,
-    [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+    [Internalize](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
       PM.add(createAMDGPUUnifyMetadataPass());
-    });
+      if (Internalize) {
+        PM.add(createInternalizePass([=](const GlobalValue &GV) -> bool {
+          if (const Function *F = dyn_cast<Function>(&GV)) {
+            if (F->isDeclaration())
+                return true;
+            switch (F->getCallingConv()) {
+            default:
+              return false;
+            case CallingConv::AMDGPU_VS:
+            case CallingConv::AMDGPU_GS:
+            case CallingConv::AMDGPU_PS:
+            case CallingConv::AMDGPU_CS:
+            case CallingConv::AMDGPU_KERNEL:
+            case CallingConv::SPIR_KERNEL:
+              return true;
+            }
+          }
+          return !GV.use_empty();
+        }));
+        PM.add(createGlobalDCEPass());
+      }
+  });
 }
 
 //===----------------------------------------------------------------------===//

Added: llvm/trunk/test/CodeGen/AMDGPU/internalize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/internalize.ll?rev=293549&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/internalize.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/internalize.ll Mon Jan 30 15:05:18 2017
@@ -0,0 +1,35 @@
+; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck %s
+; CHECK-NOT: unused
+; CHECK-NOT: foo_used
+; CHECK: gvar_used
+; CHECK: main_kernel
+
+ at gvar_unused = addrspace(1) global i32 undef, align 4
+ at gvar_used = addrspace(1) global i32 undef, align 4
+
+; Function Attrs: alwaysinline nounwind
+define void @foo_unused(i32 addrspace(1)* %out) local_unnamed_addr #1 {
+entry:
+  store i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; Function Attrs: alwaysinline nounwind
+define void @foo_used(i32 addrspace(1)* %out, i32 %tid) local_unnamed_addr #1 {
+entry:
+  store i32 %tid, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @main_kernel() {
+entry:
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  tail call void @foo_used(i32 addrspace(1)* @gvar_used, i32 %tid) nounwind
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+attributes #0 = { nounwind readnone }
+
+attributes #1 = { alwaysinline nounwind }




More information about the llvm-commits mailing list