[PATCH] R600: Make sure to inline all internal functions

Fri Oct 3 07:52:39 PDT 2014

Function calls aren't supported yet.
---
 lib/Target/R600/AMDGPUTargetMachine.cpp | 13 +++++++++++++
 test/CodeGen/R600/inline-calls.ll       | 17 +++++++++++++++++
 2 files changed, 30 insertions(+)
 create mode 100644 test/CodeGen/R600/inline-calls.ll

diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index c95a941..4c366bd4 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -80,6 +80,7 @@ public:
     return nullptr;
   }
 
+  void addIRPasses() override;
   void addCodeGenPrepare() override;
   bool addPreISel() override;
   bool addInstSelector() override;
@@ -106,6 +107,18 @@ void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
   PM.add(createAMDGPUTargetTransformInfoPass(this));
 }
 
+void AMDGPUPassConfig::addIRPasses() {
+  // Function calls are not supported, so make sure we inline everything.
+  addPass(createFunctionInliningPass(100000000));
+  // We need to add the barrier noop pass, otherwise adding the function
+  // inlining pass will cause all of the PassConfigs passes to be run
+  // one function at a time, which means if we have a nodule with two
+  // functions, then we will generate code for the first function
+  // without ever running any passes on the second.
+  addPass(createBarrierNoopPass());
+  TargetPassConfig::addIRPasses();
+}
+
 void AMDGPUPassConfig::addCodeGenPrepare() {
   const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
   if (ST.isPromoteAllocaEnabled()) {
diff --git a/test/CodeGen/R600/inline-calls.ll b/test/CodeGen/R600/inline-calls.ll
new file mode 100644
index 0000000..a7ecc29
--- /dev/null
+++ b/test/CodeGen/R600/inline-calls.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck  %s
+; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-NOT: {{^}}func:
+define internal fastcc i32 @func(i32 %a) {
+entry:
+  %tmp0 = add i32 %a, 1
+  ret i32 %tmp0
+}
+
+; CHECK: {{^}}kernel:
+define void @kernel(i32 addrspace(1)* %out) {
+entry:
+  %tmp0 = call i32 @func(i32 1)
+  store i32 %tmp0, i32 addrspace(1)* %out
+  ret void
+}
-- 
1.8.5.5