[llvm] r318179 - [CodeGenPrepare] Disable div bypass when working set size is huge.

Easwaran Raman via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 14 11:31:51 PST 2017


Author: eraman
Date: Tue Nov 14 11:31:51 2017
New Revision: 318179

URL: http://llvm.org/viewvc/llvm-project?rev=318179&view=rev
Log:
[CodeGenPrepare] Disable div bypass when working set size is huge.

Summary:
Bypass of slow divs based on operand values is currently disabled for
-Os. Do the same when profile summary is available and the working set
size of the application is huge. This is similar to how loop peeling is
guarded by hasHugeWorkingSetSize. In the div bypass case, the generated
extra code (and the extra branch) tendss to outweigh the benefits of the
bypass. This results in noticeable performance improvement on an
internal application.

Reviewers: davidxl

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D39992

Modified:
    llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
    llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll

Modified: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp?rev=318179&r1=318178&r2=318179&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp Tue Nov 14 11:31:51 2017
@@ -353,9 +353,9 @@ bool CodeGenPrepare::runOnFunction(Funct
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   OptSize = F.optForSize();
 
+  ProfileSummaryInfo *PSI =
+      getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   if (ProfileGuidedSectionPrefix) {
-    ProfileSummaryInfo *PSI =
-        getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
     if (PSI->isFunctionHotInCallGraph(&F))
       F.setSectionPrefix(".hot");
     else if (PSI->isFunctionColdInCallGraph(&F))
@@ -364,7 +364,8 @@ bool CodeGenPrepare::runOnFunction(Funct
 
   /// This optimization identifies DIV instructions that can be
   /// profitably bypassed and carried out with a shorter, faster divide.
-  if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
+  if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI &&
+      TLI->isSlowDivBypassed()) {
     const DenseMap<unsigned int, unsigned int> &BypassWidths =
        TLI->getBypassSlowDivWidths();
     BasicBlock* BB = &*F.begin();

Modified: llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll?rev=318179&r1=318178&r2=318179&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll Tue Nov 14 11:31:51 2017
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=atom       < %s | FileCheck -check-prefixes=ATOM,CHECK %s
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=silvermont < %s | FileCheck -check-prefixes=REST,CHECK %s
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake    < %s | FileCheck -check-prefixes=REST,CHECK %s
+; RUN: llc -profile-summary-huge-working-set-size-threshold=1 -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake    < %s | FileCheck -check-prefixes=HUGEWS %s
 
 ; Verify that div32 is bypassed only for Atoms.
 define i32 @div32(i32 %a, i32 %b) {
@@ -36,6 +37,15 @@ entry:
 define i64 @div64_optsize(i64 %a, i64 %b) optsize {
 ; CHECK-LABEL: div64_optsize:
 ; CHECK-NOT: divl
+; CHECK: ret
+  %div = sdiv i64 %a, %b
+  ret i64 %div
+}
+
+define i64 @div64_hugews(i64 %a, i64 %b) {
+; HUGEWS-LABEL: div64_hugews:
+; HUGEWS-NOT: divl
+; HUGEWS: ret
   %div = sdiv i64 %a, %b
   ret i64 %div
 }
@@ -43,6 +53,7 @@ define i64 @div64_optsize(i64 %a, i64 %b
 define i32 @div32_optsize(i32 %a, i32 %b) optsize {
 ; CHECK-LABEL: div32_optsize:
 ; CHECK-NOT: divb
+; CHECK: ret
   %div = sdiv i32 %a, %b
   ret i32 %div
 }
@@ -50,6 +61,23 @@ define i32 @div32_optsize(i32 %a, i32 %b
 define i32 @div32_minsize(i32 %a, i32 %b) minsize {
 ; CHECK-LABEL: div32_minsize:
 ; CHECK-NOT: divb
+; CHECK: ret
   %div = sdiv i32 %a, %b
   ret i32 %div
 }
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 1000, i32 1}
+!13 = !{i32 999000, i64 1000, i32 3}
+!14 = !{i32 999999, i64 5, i32 3}




More information about the llvm-commits mailing list