[llvm] [MachineSink] Sink into consistent blocks for optsize funcs (PR #115367)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 7 12:00:02 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Ellis Hoag (ellishg)

<details>
<summary>Changes</summary>

Do not consider profile data when choosing a successor block to sink into for optsize functions. This should result in more consistent instruction sequences which will improve outlining and ICF. We've observed a slight codesize improvement in a large binary. This is similar reasoning to https://github.com/llvm/llvm-project/pull/114607.

Using profile data to select a block to sink into was original added in https://github.com/llvm/llvm-project/commit/d04f7596e79d7c5cf7e4249ad62690afaecd01ec.

---
Full diff: https://github.com/llvm/llvm-project/pull/115367.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/MachineSink.cpp (+10-4) 
- (modified) llvm/test/CodeGen/X86/sink-blockfreq.ll (+24-7) 


``````````diff
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 658ebd47488c71..3293f7c982749d 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -38,6 +39,7 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/RegisterPressure.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
@@ -122,6 +124,7 @@ namespace {
     MachineDominatorTree *DT = nullptr;      // Machine dominator tree
     MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree
     MachineCycleInfo *CI = nullptr;
+    ProfileSummaryInfo *PSI = nullptr;
     MachineBlockFrequencyInfo *MBFI = nullptr;
     const MachineBranchProbabilityInfo *MBPI = nullptr;
     AliasAnalysis *AA = nullptr;
@@ -198,6 +201,7 @@ namespace {
       AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
       AU.addPreserved<MachineCycleInfoWrapperPass>();
       AU.addPreserved<MachineLoopInfoWrapperPass>();
+      AU.addRequired<ProfileSummaryInfoWrapperPass>();
       if (UseBlockFreqInfo)
         AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
       AU.addRequired<TargetPassConfig>();
@@ -284,6 +288,7 @@ char &llvm::MachineSinkingID = MachineSinking::ID;
 
 INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
                       "Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
@@ -722,6 +727,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
   DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
   PDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
   CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
+  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   MBFI = UseBlockFreqInfo
              ? &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()
              : nullptr;
@@ -1217,12 +1223,12 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
 
   // Sort Successors according to their cycle depth or block frequency info.
   llvm::stable_sort(
-      AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
+      AllSuccs, [&](const MachineBasicBlock *L, const MachineBasicBlock *R) {
         uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
         uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
-        bool HasBlockFreq = LHSFreq != 0 || RHSFreq != 0;
-        return HasBlockFreq ? LHSFreq < RHSFreq
-                            : CI->getCycleDepth(L) < CI->getCycleDepth(R);
+        if (llvm::shouldOptimizeForSize(MBB, PSI, MBFI) || !LHSFreq || !RHSFreq)
+          return CI->getCycleDepth(L) < CI->getCycleDepth(R);
+        return LHSFreq < RHSFreq;
       });
 
   auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
diff --git a/llvm/test/CodeGen/X86/sink-blockfreq.ll b/llvm/test/CodeGen/X86/sink-blockfreq.ll
index cad9cf81905cd4..c2653a86f53aff 100644
--- a/llvm/test/CodeGen/X86/sink-blockfreq.ll
+++ b/llvm/test/CodeGen/X86/sink-blockfreq.ll
@@ -1,12 +1,13 @@
 ; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
 ; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
+; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -force-pgso -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
 
 ; Test that by changing BlockFrequencyInfo we change the order in which
 ; machine-sink looks for successor blocks. By not using BFI, both G and B
 ; have the same loop depth and no instructions is sinked - B is selected but
 ; can't be used as to avoid breaking a non profitable critical edge. By using
 ; BFI, "mul" is sinked into the less frequent block G.
-define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp {
+define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp !prof !14 {
 ; MSINK_BFI-LABEL: sink_freqinfo
 ; MSINK_BFI: jl
 ; MSINK_BFI-NEXT: ## %bb.
@@ -22,24 +23,40 @@ B:
   %ee = phi i32 [ 0, %entry ], [ %inc, %F ]
   %xx = sub i32 %a, %ee
   %cond0 = icmp slt i32 %xx, 0
-  br i1 %cond0, label %F, label %exit, !prof !0
+  br i1 %cond0, label %F, label %exit, !prof !15
 
 F:
   %inc = add nsw i32 %xx, 2
   %aa = mul nsw i32 %b, %inc
   %exitcond = icmp slt i32 %inc, %a
-  br i1 %exitcond, label %B, label %G, !prof !1
+  br i1 %exitcond, label %B, label %G, !prof !16
 
 G:
   %ii = add nsw i32 %aa, %a
   %ll = add i32 %b, 45
   %exitcond2 = icmp sge i32 %ii, %b
-  br i1 %exitcond2, label %G, label %exit, !prof !2
+  br i1 %exitcond2, label %G, label %exit, !prof !17
 
 exit:
   ret i32 0
 }
 
-!0 = !{!"branch_weights", i32 4, i32 1}
-!1 = !{!"branch_weights", i32 128, i32 1}
-!2 = !{!"branch_weights", i32 1, i32 1}
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 1000}
+!15 = !{!"branch_weights", i32 4, i32 1}
+!16 = !{!"branch_weights", i32 128, i32 1}
+!17 = !{!"branch_weights", i32 1, i32 1}

``````````

</details>


https://github.com/llvm/llvm-project/pull/115367


More information about the llvm-commits mailing list