[PATCH] D62917: [AMDGPU] Constrain the inliner on maximum number of basic blocks in a caller function
Valery Pykhtin via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 5 10:04:24 PDT 2019
vpykhtin created this revision.
vpykhtin added reviewers: rampitec, arsenm.
Herald added subscribers: llvm-commits, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl.
Herald added a project: LLVM.
This patch improves compile time performance: luxmark hotel scene compilation time is reduced from 250 to 6 sec.
The actual check is performed last to give a change to inline what have to be always inlined.
Repository:
rL LLVM
https://reviews.llvm.org/D62917
Files:
lib/Target/AMDGPU/AMDGPUInline.cpp
test/CodeGen/AMDGPU/inline-maxbb.ll
Index: test/CodeGen/AMDGPU/inline-maxbb.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/inline-maxbb.ll
@@ -0,0 +1,33 @@
+; RUN: opt -mtriple=amdgcn-- -O3 -S -amd-inline-max-bb=2 %s | FileCheck %s --check-prefix=NOINL
+; RUN: opt -mtriple=amdgcn-- -O3 -S -amd-inline-max-bb=3 %s | FileCheck %s --check-prefix=INL
+
+define i32 @callee(i32 %x) {
+entry:
+ %cc = icmp eq i32 %x, 1
+ br i1 %cc, label %ret_res, label %mulx
+
+mulx:
+ %mul1 = mul i32 %x, %x
+ %mul2 = mul i32 %mul1, %x
+ %mul3 = mul i32 %mul1, %mul2
+ %mul4 = mul i32 %mul3, %mul2
+ %mul5 = mul i32 %mul4, %mul3
+ br label %ret_res
+
+ret_res:
+ %r = phi i32 [ %mul5, %mulx ], [ %x, %entry ]
+ ret i32 %r
+}
+
+; INL-LABEL: @caller
+; NOINL-LABEL: @caller
+; INL: mul i32
+; INL-NOT: call i32
+; NOINL-NOT: mul i32
+; NOINL: call i32
+
+define amdgpu_kernel void @caller(i32 %x) {
+ %res = call i32 @callee(i32 %x)
+ store volatile i32 %res, i32 addrspace(1)* undef
+ ret void
+}
Index: lib/Target/AMDGPU/AMDGPUInline.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUInline.cpp
+++ lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -49,6 +49,10 @@
ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256),
cl::desc("Maximum alloca size to use for inline cost"));
+static cl::opt<size_t>
+MaxBB("amd-inline-max-bb", cl::Hidden, cl::init(300),
+ cl::desc("Maximum BB number allowed in a function after inlining"));
+
namespace {
class AMDGPUInliner : public LegacyInlinerBase {
@@ -208,7 +212,15 @@
return ACT->getAssumptionCache(F);
};
- return llvm::getInlineCost(cast<CallBase>(*CS.getInstruction()), Callee,
+ auto IC = llvm::getInlineCost(cast<CallBase>(*CS.getInstruction()), Callee,
LocalParams, TTI, GetAssumptionCache, None, PSI,
RemarksEnabled ? &ORE : nullptr);
+
+ if (IC && !IC.isAlways()) {
+ // Single BB does not increase total BB amount, thus subtract 1
+ size_t Size = Caller->size() + Callee->size() - 1;
+ if (MaxBB && Size > MaxBB)
+ return llvm::InlineCost::getNever("max number of bb exceeded");
+ }
+ return IC;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D62917.203187.patch
Type: text/x-patch
Size: 2251 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190605/a993fa28/attachment.bin>
More information about the llvm-commits
mailing list