[llvm] 57c33ac - [MachineSink] Sink into consistent blocks for optsize funcs (#115367)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 12 09:53:30 PST 2024
Author: Ellis Hoag
Date: 2024-11-12T09:53:27-08:00
New Revision: 57c33acac8c74eb071ede35d819918d8bd00e45b
URL: https://github.com/llvm/llvm-project/commit/57c33acac8c74eb071ede35d819918d8bd00e45b
DIFF: https://github.com/llvm/llvm-project/commit/57c33acac8c74eb071ede35d819918d8bd00e45b.diff
LOG: [MachineSink] Sink into consistent blocks for optsize funcs (#115367)
Do not consider profile data when choosing a successor block to sink
into for optsize functions. This should result in more consistent
instruction sequences which will improve outlining and ICF. We've
observed a slight codesize improvement in a large binary. This is
similar reasoning to https://github.com/llvm/llvm-project/pull/114607.
Using profile data to select a block to sink into was original added in
https://github.com/llvm/llvm-project/commit/d04f7596e79d7c5cf7e4249ad62690afaecd01ec.
Added:
Modified:
llvm/lib/CodeGen/MachineSink.cpp
llvm/test/CodeGen/X86/sink-blockfreq.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 658ebd47488c71..3293f7c982749d 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -38,6 +39,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -122,6 +124,7 @@ namespace {
MachineDominatorTree *DT = nullptr; // Machine dominator tree
MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree
MachineCycleInfo *CI = nullptr;
+ ProfileSummaryInfo *PSI = nullptr;
MachineBlockFrequencyInfo *MBFI = nullptr;
const MachineBranchProbabilityInfo *MBPI = nullptr;
AliasAnalysis *AA = nullptr;
@@ -198,6 +201,7 @@ namespace {
AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
AU.addPreserved<MachineCycleInfoWrapperPass>();
AU.addPreserved<MachineLoopInfoWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
if (UseBlockFreqInfo)
AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
@@ -284,6 +288,7 @@ char &llvm::MachineSinkingID = MachineSinking::ID;
INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
"Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
@@ -722,6 +727,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
PDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
MBFI = UseBlockFreqInfo
? &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()
: nullptr;
@@ -1217,12 +1223,12 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
// Sort Successors according to their cycle depth or block frequency info.
llvm::stable_sort(
- AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
+ AllSuccs, [&](const MachineBasicBlock *L, const MachineBasicBlock *R) {
uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
- bool HasBlockFreq = LHSFreq != 0 || RHSFreq != 0;
- return HasBlockFreq ? LHSFreq < RHSFreq
- : CI->getCycleDepth(L) < CI->getCycleDepth(R);
+ if (llvm::shouldOptimizeForSize(MBB, PSI, MBFI) || !LHSFreq || !RHSFreq)
+ return CI->getCycleDepth(L) < CI->getCycleDepth(R);
+ return LHSFreq < RHSFreq;
});
auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
diff --git a/llvm/test/CodeGen/X86/sink-blockfreq.ll b/llvm/test/CodeGen/X86/sink-blockfreq.ll
index cad9cf81905cd4..c2653a86f53aff 100644
--- a/llvm/test/CodeGen/X86/sink-blockfreq.ll
+++ b/llvm/test/CodeGen/X86/sink-blockfreq.ll
@@ -1,12 +1,13 @@
; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
+; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -force-pgso -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
; Test that by changing BlockFrequencyInfo we change the order in which
; machine-sink looks for successor blocks. By not using BFI, both G and B
; have the same loop depth and no instructions is sinked - B is selected but
; can't be used as to avoid breaking a non profitable critical edge. By using
; BFI, "mul" is sinked into the less frequent block G.
-define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp {
+define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp !prof !14 {
; MSINK_BFI-LABEL: sink_freqinfo
; MSINK_BFI: jl
; MSINK_BFI-NEXT: ## %bb.
@@ -22,24 +23,40 @@ B:
%ee = phi i32 [ 0, %entry ], [ %inc, %F ]
%xx = sub i32 %a, %ee
%cond0 = icmp slt i32 %xx, 0
- br i1 %cond0, label %F, label %exit, !prof !0
+ br i1 %cond0, label %F, label %exit, !prof !15
F:
%inc = add nsw i32 %xx, 2
%aa = mul nsw i32 %b, %inc
%exitcond = icmp slt i32 %inc, %a
- br i1 %exitcond, label %B, label %G, !prof !1
+ br i1 %exitcond, label %B, label %G, !prof !16
G:
%ii = add nsw i32 %aa, %a
%ll = add i32 %b, 45
%exitcond2 = icmp sge i32 %ii, %b
- br i1 %exitcond2, label %G, label %exit, !prof !2
+ br i1 %exitcond2, label %G, label %exit, !prof !17
exit:
ret i32 0
}
-!0 = !{!"branch_weights", i32 4, i32 1}
-!1 = !{!"branch_weights", i32 128, i32 1}
-!2 = !{!"branch_weights", i32 1, i32 1}
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 1000}
+!15 = !{!"branch_weights", i32 4, i32 1}
+!16 = !{!"branch_weights", i32 128, i32 1}
+!17 = !{!"branch_weights", i32 1, i32 1}
More information about the llvm-commits
mailing list