<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Wed, Jun 29, 2016 at 2:51 PM, Rong Xu <span dir="ltr"><<a href="mailto:xur@google.com" target="_blank">xur@google.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">xur updated this revision to Diff 62287.<br>
xur added a comment.<br>
<br>
ere is the updated patch. Changes are:<br>
(1) Removed options preinline as suggested by mehdi.<br>
(2) Updated the cleanup passes list.<br>
<br>
I used google internal benchmarks to measure the performance. The cleanup pass have ~2% geo-mean on the profile-generate performance. The largest saving is 17%.<br></blockquote><div><br></div><div> </div><div>You may want to clarify it is the effect of those passes other than pre-inline.</div><div><br></div><div>David</div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
David's suggestion on using SROA and EarlyCSE is good. It has positive impacts on a number of programs (largest ~10%)<br>
<br>
Jumpthreading actually helps a few programs also (~3%).<br>
<br>
In this patch, I propose to use<br>
SROA + EArlyCSE + CFGSimplification + InstructionCombining<br>
and remove the rest.<br>
<div><div class="h5"><br>
<br>
<a href="http://reviews.llvm.org/D21405" rel="noreferrer" target="_blank">http://reviews.llvm.org/D21405</a><br>
<br>
Files:<br>
lib/Transforms/IPO/PassManagerBuilder.cpp<br>
test/Transforms/PGOProfile/preinline.ll<br>
<br>
Index: test/Transforms/PGOProfile/preinline.ll<br>
===================================================================<br>
--- /dev/null<br>
+++ test/Transforms/PGOProfile/preinline.ll<br>
@@ -0,0 +1,20 @@<br>
+; RUN: opt < %s -O2 -profile-generate=default.profraw -S | FileCheck %s --check-prefix=GEN<br>
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"<br>
+target triple = "x86_64-unknown-linux-gnu"<br>
+<br>
+define i32 @foo(i32 %i) {<br>
+entry:<br>
+; GEN: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo<br>
+; GEN-NOT: %pgocount.i = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__bar<br>
+ %call = call i32 @bar()<br>
+ %add = add nsw i32 %i, %call<br>
+ ret i32 %add<br>
+}<br>
+<br>
+define internal i32 @bar() {<br>
+entry:<br>
+ %call = call i32 (...) @bar1()<br>
+ ret i32 %call<br>
+}<br>
+<br>
+declare i32 @bar1(...)<br>
Index: lib/Transforms/IPO/PassManagerBuilder.cpp<br>
===================================================================<br>
--- lib/Transforms/IPO/PassManagerBuilder.cpp<br>
+++ lib/Transforms/IPO/PassManagerBuilder.cpp<br>
</div></div>@@ -114,6 +114,10 @@<br>
<span class=""> "enable-loop-versioning-licm", cl::init(false), cl::Hidden,<br>
cl::desc("Enable the experimental Loop Versioning LICM pass"));<br>
<br>
+static cl::opt<bool><br>
</span><span class="">+ DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,<br>
+ cl::desc("Disable pre-instrumentation inliner"));<br>
+<br>
PassManagerBuilder::PassManagerBuilder() {<br>
OptLevel = 2;<br>
SizeLevel = 0;<br>
</span>@@ -202,8 +206,30 @@<br>
<span class=""> FPM.add(createLowerExpectIntrinsicPass());<br>
}<br>
<br>
+// Get the inline threshold for pre-instrumentation inline.<br>
+static int computePreInlineThresholdFromOptLevels(unsigned OptLevel,<br>
+ unsigned SizeOptLevel) {<br>
+ if (SizeOptLevel == 1) // -Os<br>
+ return 75;<br>
+ if (SizeOptLevel == 2) // -Oz<br>
+ return 25;<br>
+ return 75;<br>
+}<br>
+<br>
// Do PGO instrumentation generation or use pass as the option specified.<br>
void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {<br>
+ if (PGOInstrGen.empty() && PGOInstrUse.empty())<br>
+ return;<br>
</span>+ if (OptLevel > 0 && !DisablePreInliner) {<br>
<span class="">+ // Create preinline pass.<br>
+ MPM.add(createFunctionInliningPass(<br>
+ computePreInlineThresholdFromOptLevels(OptLevel, SizeLevel)));<br>
</span>+ MPM.add(createSROAPass());<br>
<span class="im HOEnZb">+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies<br>
</span><span class="im HOEnZb">+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs<br>
+ MPM.add(createInstructionCombiningPass()); // Combine silly seq's<br>
</span><div class="HOEnZb"><div class="h5">+ addExtensionsToPM(EP_Peephole, MPM);<br>
+ }<br>
if (!PGOInstrGen.empty()) {<br>
MPM.add(createPGOInstrumentationGenLegacyPass());<br>
// Add the profile lowering pass.<br>
<br>
<br>
</div></div></blockquote></div><br></div></div>