[llvm] [MachineSink] Sink into consistent blocks for optsize funcs (PR #115367)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 7 12:00:02 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Ellis Hoag (ellishg)
<details>
<summary>Changes</summary>
Do not consider profile data when choosing a successor block to sink into for optsize functions. This should result in more consistent instruction sequences which will improve outlining and ICF. We've observed a slight codesize improvement in a large binary. This is similar reasoning to https://github.com/llvm/llvm-project/pull/114607.
Using profile data to select a block to sink into was original added in https://github.com/llvm/llvm-project/commit/d04f7596e79d7c5cf7e4249ad62690afaecd01ec.
---
Full diff: https://github.com/llvm/llvm-project/pull/115367.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/MachineSink.cpp (+10-4)
- (modified) llvm/test/CodeGen/X86/sink-blockfreq.ll (+24-7)
``````````diff
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 658ebd47488c71..3293f7c982749d 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -38,6 +39,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -122,6 +124,7 @@ namespace {
MachineDominatorTree *DT = nullptr; // Machine dominator tree
MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree
MachineCycleInfo *CI = nullptr;
+ ProfileSummaryInfo *PSI = nullptr;
MachineBlockFrequencyInfo *MBFI = nullptr;
const MachineBranchProbabilityInfo *MBPI = nullptr;
AliasAnalysis *AA = nullptr;
@@ -198,6 +201,7 @@ namespace {
AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
AU.addPreserved<MachineCycleInfoWrapperPass>();
AU.addPreserved<MachineLoopInfoWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
if (UseBlockFreqInfo)
AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
@@ -284,6 +288,7 @@ char &llvm::MachineSinkingID = MachineSinking::ID;
INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
"Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
@@ -722,6 +727,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
PDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
MBFI = UseBlockFreqInfo
? &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()
: nullptr;
@@ -1217,12 +1223,12 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
// Sort Successors according to their cycle depth or block frequency info.
llvm::stable_sort(
- AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
+ AllSuccs, [&](const MachineBasicBlock *L, const MachineBasicBlock *R) {
uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
- bool HasBlockFreq = LHSFreq != 0 || RHSFreq != 0;
- return HasBlockFreq ? LHSFreq < RHSFreq
- : CI->getCycleDepth(L) < CI->getCycleDepth(R);
+ if (llvm::shouldOptimizeForSize(MBB, PSI, MBFI) || !LHSFreq || !RHSFreq)
+ return CI->getCycleDepth(L) < CI->getCycleDepth(R);
+ return LHSFreq < RHSFreq;
});
auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
diff --git a/llvm/test/CodeGen/X86/sink-blockfreq.ll b/llvm/test/CodeGen/X86/sink-blockfreq.ll
index cad9cf81905cd4..c2653a86f53aff 100644
--- a/llvm/test/CodeGen/X86/sink-blockfreq.ll
+++ b/llvm/test/CodeGen/X86/sink-blockfreq.ll
@@ -1,12 +1,13 @@
; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
+; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -force-pgso -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
; Test that by changing BlockFrequencyInfo we change the order in which
; machine-sink looks for successor blocks. By not using BFI, both G and B
; have the same loop depth and no instructions is sinked - B is selected but
; can't be used as to avoid breaking a non profitable critical edge. By using
; BFI, "mul" is sinked into the less frequent block G.
-define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp {
+define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp !prof !14 {
; MSINK_BFI-LABEL: sink_freqinfo
; MSINK_BFI: jl
; MSINK_BFI-NEXT: ## %bb.
@@ -22,24 +23,40 @@ B:
%ee = phi i32 [ 0, %entry ], [ %inc, %F ]
%xx = sub i32 %a, %ee
%cond0 = icmp slt i32 %xx, 0
- br i1 %cond0, label %F, label %exit, !prof !0
+ br i1 %cond0, label %F, label %exit, !prof !15
F:
%inc = add nsw i32 %xx, 2
%aa = mul nsw i32 %b, %inc
%exitcond = icmp slt i32 %inc, %a
- br i1 %exitcond, label %B, label %G, !prof !1
+ br i1 %exitcond, label %B, label %G, !prof !16
G:
%ii = add nsw i32 %aa, %a
%ll = add i32 %b, 45
%exitcond2 = icmp sge i32 %ii, %b
- br i1 %exitcond2, label %G, label %exit, !prof !2
+ br i1 %exitcond2, label %G, label %exit, !prof !17
exit:
ret i32 0
}
-!0 = !{!"branch_weights", i32 4, i32 1}
-!1 = !{!"branch_weights", i32 128, i32 1}
-!2 = !{!"branch_weights", i32 1, i32 1}
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 1000}
+!15 = !{!"branch_weights", i32 4, i32 1}
+!16 = !{!"branch_weights", i32 128, i32 1}
+!17 = !{!"branch_weights", i32 1, i32 1}
``````````
</details>
https://github.com/llvm/llvm-project/pull/115367
More information about the llvm-commits
mailing list