[llvm] edab7dd - Disable hoisting MI to hotter basic blocks
Victor Huang via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 11 13:36:22 PST 2019
Author: Victor Huang
Date: 2019-11-11T21:32:56Z
New Revision: edab7dd426249bd40059b49b255ba9cc5b784753
URL: https://github.com/llvm/llvm-project/commit/edab7dd426249bd40059b49b255ba9cc5b784753
DIFF: https://github.com/llvm/llvm-project/commit/edab7dd426249bd40059b49b255ba9cc5b784753.diff
LOG: Disable hoisting MI to hotter basic blocks
In current Hoist() function of machine licm pass, it will not check the source and destination basic block frequencies that a instruction is hoisted from/to.
There is a chance that instruction is hoisted from a cold to a hot basic block.
In this patch, we add options to disable machine instruction hoisting if destination block is hotter.
Differential Revision: https://reviews.llvm.org/D63676
Added:
llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessNoProfileData.mir
llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir
Modified:
llvm/lib/CodeGen/MachineLICM.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 6a898ff6ef88..194125feea0d 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -74,6 +75,27 @@ static cl::opt<bool>
HoistConstStores("hoist-const-stores",
cl::desc("Hoist invariant stores"),
cl::init(true), cl::Hidden);
+// The default threshold of 100 (i.e. if target block is 100 times hotter)
+// is based on empirical data on a single target and is subject to tuning.
+static cl::opt<unsigned>
+BlockFrequencyRatioThreshold("block-freq-ratio-threshold",
+ cl::desc("Do not hoist instructions if target"
+ "block is N times hotter than the source."),
+ cl::init(100), cl::Hidden);
+
+enum class UseBFI { None, PGO, All };
+
+static cl::opt<UseBFI>
+DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
+ cl::desc("Disable hoisting instructions to"
+ " hotter blocks"),
+ cl::init(UseBFI::None), cl::Hidden,
+ cl::values(clEnumValN(UseBFI::None, "none",
+ "disable the feature"),
+ clEnumValN(UseBFI::PGO, "pgo",
+ "enable the feature when using profile data"),
+ clEnumValN(UseBFI::All, "all",
+ "enable the feature with/wo profile data")));
STATISTIC(NumHoisted,
"Number of machine instructions hoisted out of loops");
@@ -87,6 +109,8 @@ STATISTIC(NumPostRAHoisted,
"Number of machine instructions hoisted out of loops post regalloc");
STATISTIC(NumStoreConst,
"Number of stores of const phys reg hoisted out of loops");
+STATISTIC(NumNotHoistedDueToHotness,
+ "Number of instructions not hoisted due to block frequency");
namespace {
@@ -98,9 +122,11 @@ namespace {
MachineRegisterInfo *MRI;
TargetSchedModel SchedModel;
bool PreRegAlloc;
+ bool HasProfileData;
// Various analyses that we use...
AliasAnalysis *AA; // Alias analysis info.
+ MachineBlockFrequencyInfo *MBFI; // Machine block frequncy info
MachineLoopInfo *MLI; // Current MachineLoopInfo
MachineDominatorTree *DT; // Machine dominator tree for the cur loop
@@ -150,6 +176,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineLoopInfo>();
+ if (DisableHoistingToHotterBlocks != UseBFI::None)
+ AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<MachineLoopInfo>();
@@ -245,6 +273,8 @@ namespace {
void InitCSEMap(MachineBasicBlock *BB);
+ bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
+ MachineBasicBlock *TgtBlock);
MachineBasicBlock *getCurPreheader();
};
@@ -275,6 +305,7 @@ char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
"Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
@@ -283,6 +314,7 @@ INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
"Early Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
@@ -315,6 +347,7 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
SchedModel.init(&ST);
PreRegAlloc = MRI->isSSA();
+ HasProfileData = MF.getFunction().hasProfileData();
if (PreRegAlloc)
LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
@@ -333,6 +366,8 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
}
// Get our Loop information...
+ if (DisableHoistingToHotterBlocks != UseBFI::None)
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
MLI = &getAnalysis<MachineLoopInfo>();
DT = &getAnalysis<MachineDominatorTree>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
@@ -1433,6 +1468,15 @@ bool MachineLICMBase::MayCSE(MachineInstr *MI) {
/// that are safe to hoist, this instruction is called to do the dirty work.
/// It returns true if the instruction is hoisted.
bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
+ MachineBasicBlock *SrcBlock = MI->getParent();
+
+ // Disable the instruction hoisting due to block hotness
+ if ((DisableHoistingToHotterBlocks == UseBFI::All ||
+ (DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) &&
+ isTgtHotterThanSrc(SrcBlock, Preheader)) {
+ ++NumNotHoistedDueToHotness;
+ return false;
+ }
// First check whether we should hoist this instruction.
if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
// If not, try unfolding a hoistable load.
@@ -1526,3 +1570,21 @@ MachineBasicBlock *MachineLICMBase::getCurPreheader() {
}
return CurPreheader;
}
+
+/// Is the target basic block at least "BlockFrequencyRatioThreshold"
+/// times hotter than the source basic block.
+bool MachineLICMBase::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
+ MachineBasicBlock *TgtBlock) {
+ // Parse source and target basic block frequency from MBFI
+ uint64_t SrcBF = MBFI->getBlockFreq(SrcBlock).getFrequency();
+ uint64_t DstBF = MBFI->getBlockFreq(TgtBlock).getFrequency();
+
+ // Disable the hoisting if source block frequency is zero
+ if (!SrcBF)
+ return true;
+
+ double Ratio = (double)DstBF / SrcBF;
+
+ // Compare the block frequency ratio with the threshold
+ return Ratio > BlockFrequencyRatioThreshold;
+}
diff --git a/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessNoProfileData.mir b/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessNoProfileData.mir
new file mode 100644
index 000000000000..84afa3f09e1e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessNoProfileData.mir
@@ -0,0 +1,190 @@
+# NOTE: This test verifies disable/enable instruction hoisting to hot blocks based on non-profile data
+# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=all -block-freq-ratio-threshold=100 %s -o - | FileCheck %s --check-prefix=CHECK-NO-HOIST
+# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=all -block-freq-ratio-threshold=100000000 %s -o - | FileCheck %s --check-prefix=CHECK-HOIST
+# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=pgo -block-freq-ratio-threshold=100 %s -o - | FileCheck %s --check-prefix=CHECK-HOIST
+# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=none -block-freq-ratio-threshold=100 %s -o - | FileCheck %s --check-prefix=CHECK-HOIST
+
+--- |
+ target datalayout = "e-m:e-i64:64-n32:64"
+
+ define dso_local void @test(void (i32)* nocapture %fp, i32 signext %Arg, i32 signext %Len, i32* nocapture %Ptr) {
+ entry:
+ tail call void asm sideeffect "#NOTHING", "~{r2}"()
+ %cmp6 = icmp sgt i32 %Len, 0
+ br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup
+
+ for.body.lr.ph: ; preds = %entry
+ %cmp1 = icmp sgt i32 %Arg, 10
+ br label %for.body
+
+ for.cond.cleanup: ; preds = %for.inc, %entry
+ ret void
+
+ for.body: ; preds = %for.inc, %for.body.lr.ph
+ %i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+ %0 = load i32, i32* %Ptr, align 4
+ %1 = add i32 %i.07, %0
+ store i32 %1, i32* %Ptr, align 4
+ br i1 %cmp1, label %if.then, label %for.inc
+
+ if.then: ; preds = %for.body
+ tail call void asm sideeffect "#NOTHING", "~{r2}"()
+ tail call void %fp(i32 signext %Arg)
+ br label %for.inc
+
+ for.inc: ; preds = %if.then, %for.body
+ %inc = add nuw nsw i32 %i.07, 1
+ %exitcond = icmp eq i32 %Len, %inc
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+ }
+
+ ; Function Attrs: nounwind
+ declare void @llvm.stackprotector(i8*, i8**) #0
+
+ attributes #0 = { nounwind }
+
+...
+---
+name: test
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+registers:
+ - { id: 0, class: crbitrc, preferred-register: '' }
+ - { id: 1, class: gprc_and_gprc_nor0, preferred-register: '' }
+ - { id: 2, class: gprc, preferred-register: '' }
+ - { id: 3, class: g8rc, preferred-register: '' }
+ - { id: 4, class: g8rc, preferred-register: '' }
+ - { id: 5, class: g8rc, preferred-register: '' }
+ - { id: 6, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+ - { id: 7, class: gprc, preferred-register: '' }
+ - { id: 8, class: gprc, preferred-register: '' }
+ - { id: 9, class: crrc, preferred-register: '' }
+ - { id: 10, class: gprc, preferred-register: '' }
+ - { id: 11, class: crrc, preferred-register: '' }
+ - { id: 12, class: gprc, preferred-register: '' }
+ - { id: 13, class: gprc, preferred-register: '' }
+ - { id: 14, class: g8rc, preferred-register: '' }
+ - { id: 15, class: g8rc, preferred-register: '' }
+ - { id: 16, class: crrc, preferred-register: '' }
+liveins:
+ - { reg: '$x3', virtual-reg: '%3' }
+ - { reg: '$x4', virtual-reg: '%4' }
+ - { reg: '$x5', virtual-reg: '%5' }
+ - { reg: '$x6', virtual-reg: '%6' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: true
+ stackProtector: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+constants: []
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x7ecade30), %bb.2(0x013521d0)
+ liveins: $x3, $x4, $x5, $x6
+
+ %6:g8rc_and_g8rc_nox0 = COPY $x6
+ %5:g8rc = COPY $x5
+ %4:g8rc = COPY $x4
+ %3:g8rc = COPY $x3
+ %7:gprc = COPY %4.sub_32
+ %8:gprc = COPY %5.sub_32
+ INLINEASM &"#NOTHING", 1, 12, implicit-def early-clobber $r2
+ %9:crrc = CMPWI %8, 1
+ BCC 12, killed %9, %bb.2
+ B %bb.1
+
+ bb.1.for.body.lr.ph:
+ successors: %bb.3(0x80000000)
+
+ %11:crrc = CMPWI %7, 10
+ %0:crbitrc = COPY %11.sub_gt
+ %10:gprc = LI 0
+ B %bb.3
+
+ bb.2.for.cond.cleanup:
+ BLR8 implicit $lr8, implicit $rm
+
+ bb.3.for.body:
+ successors: %bb.4(0x00000002), %bb.5(0x7ffffffe)
+
+ %1:gprc_and_gprc_nor0 = PHI %10, %bb.1, %2, %bb.5
+ %12:gprc = LWZ 0, %6 :: (load 4 from %ir.Ptr)
+ %13:gprc = ADD4 %1, killed %12
+ STW killed %13, 0, %6 :: (store 4 into %ir.Ptr)
+ BCn %0, %bb.5
+ B %bb.4
+
+ bb.4.if.then:
+ successors: %bb.5(0x80000000)
+
+ INLINEASM &"#NOTHING", 1, 12, implicit-def early-clobber $r2
+ ADJCALLSTACKDOWN 32, 0, implicit-def dead $r1, implicit $r1
+ %14:g8rc = COPY $x2
+ STD %14, 24, $x1 :: (store 8 into stack + 24)
+ %15:g8rc = EXTSW_32_64 %7
+ $x3 = COPY %15
+ $x12 = COPY %3
+ MTCTR8 %3, implicit-def $ctr8
+ BCTRL8_LDinto_toc 24, $x1, csr_svr464_altivec, implicit-def dead $lr8, implicit-def dead $x2, implicit $ctr8, implicit $rm, implicit $x3, implicit $x12, implicit $x2, implicit-def $r1
+ ADJCALLSTACKUP 32, 0, implicit-def dead $r1, implicit $r1
+
+ bb.5.for.inc:
+ successors: %bb.2(0x013521d0), %bb.3(0x7ecade30)
+
+ %2:gprc = nuw nsw ADDI %1, 1
+ %16:crrc = CMPLW %8, %2
+ BCC 76, killed %16, %bb.2
+ B %bb.3
+
+...
+
+# CHECK for enabling instruction hoisting
+#CHECK-LABEL: test
+#CHECK-HOIST: bb.1.for.body.lr.ph:
+#CHECK-HOIST: %14:g8rc = COPY $x2
+#CHECK-HOIST: STD %14, 24, $x1 :: (store 8 into stack + 24)
+#CHECK-HOIST: %15:g8rc = EXTSW_32_64 %7
+#CHECK-HOIST: B %bb.3
+
+#CHECK-HOIST: bb.4.if.then:
+#CHECK-HOIST-NOT: %14:g8rc = COPY $x2
+#CHECK-HOIST-NOT: STD %14, 24, $x1 :: (store 8 into stack + 24)
+#CHECK-HOIST-NOT: %15:g8rc = EXTSW_32_64 %7
+#CHECK-HOIST: bb.5.for.inc:
+
+# CHECK for disabling instruction hoisting due to block hotness
+#CHECK-LABEL: test
+#CHECK-NO-HOIST: bb.1.for.body.lr.ph:
+#CHECK-NO-HOIST-NOT: %14:g8rc = COPY $x2
+#CHECK-NO-HOIST-NOT: STD %14, 24, $x1 :: (store 8 into stack + 24)
+#CHECK-NO-HOIST-NOT: %15:g8rc = EXTSW_32_64 %7
+#CHECK-NO-HOIST: B %bb.3
+
+#CHECK-NO-HOIST: bb.4.if.then:
+#CHECK-NO-HOIST: %14:g8rc = COPY $x2
+#CHECK-NO-HOIST: STD %14, 24, $x1 :: (store 8 into stack + 24)
+#CHECK-NO-HOIST: %15:g8rc = EXTSW_32_64 %7
+#CHECK-NO-HOIST: bb.5.for.inc:
diff --git a/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir b/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir
new file mode 100644
index 000000000000..a9b65574066e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir
@@ -0,0 +1,237 @@
+# NOTE: This test verifies disable/enable instruction hoisting to hot blocks based on profile data
+# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=pgo -block-freq-ratio-threshold=100 %s -o - | FileCheck %s --check-prefix=CHECK-NO-HOIST
+# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=pgo -block-freq-ratio-threshold=100000000 %s -o - | FileCheck %s --check-prefix=CHECK-HOIST
+# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=none -block-freq-ratio-threshold=100 %s -o - | FileCheck %s --check-prefix=CHECK-HOIST
+
+--- |
+ target datalayout = "e-m:e-i64:64-n32:64"
+ target triple = "powerpc64le-unknown-linux-gnu"
+
+ ; Function Attrs: nounwind
+ define dso_local void @test(void (i32)* nocapture %fp, i32 signext %Arg, i32 signext %Len, i32* nocapture %Ptr) local_unnamed_addr #0 !prof !29 !section_prefix !30 {
+ entry:
+ tail call void asm sideeffect "#NOTHING", "~{r2}"() #1, !srcloc !31
+ %cmp6 = icmp sgt i32 %Len, 0
+ br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup, !prof !32
+
+ for.body.lr.ph: ; preds = %entry
+ %cmp1 = icmp sgt i32 %Arg, 10
+ br label %for.body
+
+ for.cond.cleanup: ; preds = %for.inc, %entry
+ ret void
+
+ for.body: ; preds = %for.inc, %for.body.lr.ph
+ %i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+ %0 = load i32, i32* %Ptr, align 4, !tbaa !33
+ %1 = add i32 %i.07, %0
+ store i32 %1, i32* %Ptr, align 4, !tbaa !33
+ br i1 %cmp1, label %if.then, label %for.inc, !prof !37
+
+ if.then: ; preds = %for.body
+ tail call void asm sideeffect "#NOTHING", "~{r2}"() #1, !srcloc !31
+ tail call void %fp(i32 signext %Arg) #1, !prof !38
+ br label %for.inc
+
+ for.inc: ; preds = %if.then, %for.body
+ %inc = add nuw nsw i32 %i.07, 1
+ %exitcond = icmp eq i32 %Len, %inc
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !prof !39
+ }
+
+ ; Function Attrs: nounwind
+ declare void @llvm.stackprotector(i8*, i8**) #1
+
+ attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #1 = { nounwind }
+
+ !llvm.module.flags = !{!0, !1}
+ !llvm.ident = !{!28}
+
+ !0 = !{i32 1, !"wchar_size", i32 4}
+ !1 = !{i32 1, !"ProfileSummary", !2}
+ !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+ !3 = !{!"ProfileFormat", !"InstrProf"}
+ !4 = !{!"TotalCount", i64 25405000087}
+ !5 = !{!"MaxCount", i64 21000000020}
+ !6 = !{!"MaxInternalCount", i64 200000003}
+ !7 = !{!"MaxFunctionCount", i64 21000000020}
+ !8 = !{!"NumCounts", i64 15}
+ !9 = !{!"NumFunctions", i64 7}
+ !10 = !{!"DetailedSummary", !11}
+ !11 = !{!12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27}
+ !12 = !{i32 10000, i64 21000000020, i32 1}
+ !13 = !{i32 100000, i64 21000000020, i32 1}
+ !14 = !{i32 200000, i64 21000000020, i32 1}
+ !15 = !{i32 300000, i64 21000000020, i32 1}
+ !16 = !{i32 400000, i64 21000000020, i32 1}
+ !17 = !{i32 500000, i64 21000000020, i32 1}
+ !18 = !{i32 600000, i64 21000000020, i32 1}
+ !19 = !{i32 700000, i64 21000000020, i32 1}
+ !20 = !{i32 800000, i64 21000000020, i32 1}
+ !21 = !{i32 900000, i64 4203000000, i32 2}
+ !22 = !{i32 950000, i64 4203000000, i32 2}
+ !23 = !{i32 990000, i64 4203000000, i32 2}
+ !24 = !{i32 999000, i64 200000003, i32 3}
+ !25 = !{i32 999900, i64 200000003, i32 3}
+ !26 = !{i32 999990, i64 2000000, i32 4}
+ !27 = !{i32 999999, i64 2000000, i32 4}
+ !28 = !{!"clang version 9.0.0 (git at github.ibm.com:compiler/llvm-project.git 01fc2fc8e690ee427cab149cb0bfd63568bed89b)"}
+ !29 = !{!"function_entry_count", i64 200000003}
+ !30 = !{!"function_section_prefix", !".hot"}
+ !31 = !{i32 65}
+ !32 = !{!"branch_weights", i32 -94967292, i32 40000000}
+ !33 = !{!34, !34, i64 0}
+ !34 = !{!"int", !35, i64 0}
+ !35 = !{!"omnipotent char", !36, i64 0}
+ !36 = !{!"Simple C/C++ TBAA"}
+ !37 = !{!"branch_weights", i32 4, i32 -94967296}
+ !38 = !{!"VP", i32 0, i64 20, i64 -3706093650706652785, i64 20}
+ !39 = !{!"branch_weights", i32 40000000, i32 -94967292}
+
+...
+---
+name: test
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+registers:
+ - { id: 0, class: crbitrc, preferred-register: '' }
+ - { id: 1, class: gprc_and_gprc_nor0, preferred-register: '' }
+ - { id: 2, class: gprc, preferred-register: '' }
+ - { id: 3, class: g8rc, preferred-register: '' }
+ - { id: 4, class: g8rc, preferred-register: '' }
+ - { id: 5, class: g8rc, preferred-register: '' }
+ - { id: 6, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+ - { id: 7, class: gprc, preferred-register: '' }
+ - { id: 8, class: gprc, preferred-register: '' }
+ - { id: 9, class: crrc, preferred-register: '' }
+ - { id: 10, class: gprc, preferred-register: '' }
+ - { id: 11, class: crrc, preferred-register: '' }
+ - { id: 12, class: gprc, preferred-register: '' }
+ - { id: 13, class: gprc, preferred-register: '' }
+ - { id: 14, class: g8rc, preferred-register: '' }
+ - { id: 15, class: g8rc, preferred-register: '' }
+ - { id: 16, class: crrc, preferred-register: '' }
+liveins:
+ - { reg: '$x3', virtual-reg: '%3' }
+ - { reg: '$x4', virtual-reg: '%4' }
+ - { reg: '$x5', virtual-reg: '%5' }
+ - { reg: '$x6', virtual-reg: '%6' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: true
+ stackProtector: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+constants: []
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x7ecade30), %bb.2(0x013521d0)
+ liveins: $x3, $x4, $x5, $x6
+
+ %6:g8rc_and_g8rc_nox0 = COPY $x6
+ %5:g8rc = COPY $x5
+ %4:g8rc = COPY $x4
+ %3:g8rc = COPY $x3
+ %7:gprc = COPY %4.sub_32
+ %8:gprc = COPY %5.sub_32
+ INLINEASM &"#NOTHING", 1, 12, implicit-def early-clobber $r2, !31
+ %9:crrc = CMPWI %8, 1
+ BCC 12, killed %9, %bb.2
+ B %bb.1
+
+ bb.1.for.body.lr.ph:
+ successors: %bb.3(0x80000000)
+
+ %11:crrc = CMPWI %7, 10
+ %0:crbitrc = COPY %11.sub_gt
+ %10:gprc = LI 0
+ B %bb.3
+
+ bb.2.for.cond.cleanup:
+ BLR8 implicit $lr8, implicit $rm
+
+ bb.3.for.body:
+ successors: %bb.4(0x00000002), %bb.5(0x7ffffffe)
+
+ %1:gprc_and_gprc_nor0 = PHI %10, %bb.1, %2, %bb.5
+ %12:gprc = LWZ 0, %6 :: (load 4 from %ir.Ptr, !tbaa !33)
+ %13:gprc = ADD4 %1, killed %12
+ STW killed %13, 0, %6 :: (store 4 into %ir.Ptr, !tbaa !33)
+ BCn %0, %bb.5
+ B %bb.4
+
+ bb.4.if.then:
+ successors: %bb.5(0x80000000)
+
+ INLINEASM &"#NOTHING", 1, 12, implicit-def early-clobber $r2, !31
+ ADJCALLSTACKDOWN 32, 0, implicit-def dead $r1, implicit $r1
+ %14:g8rc = COPY $x2
+ STD %14, 24, $x1 :: (store 8 into stack + 24)
+ %15:g8rc = EXTSW_32_64 %7
+ $x3 = COPY %15
+ $x12 = COPY %3
+ MTCTR8 %3, implicit-def $ctr8
+ BCTRL8_LDinto_toc 24, $x1, csr_svr464_altivec, implicit-def dead $lr8, implicit-def dead $x2, implicit $ctr8, implicit $rm, implicit $x3, implicit $x12, implicit $x2, implicit-def $r1
+ ADJCALLSTACKUP 32, 0, implicit-def dead $r1, implicit $r1
+
+ bb.5.for.inc:
+ successors: %bb.2(0x013521d0), %bb.3(0x7ecade30)
+
+ %2:gprc = nuw nsw ADDI %1, 1
+ %16:crrc = CMPLW %8, %2
+ BCC 76, killed %16, %bb.2
+ B %bb.3
+
+...
+
+# CHECK for enabling instruction hoisting
+#CHECK-LABEL: test
+#CHECK-HOIST: bb.1.for.body.lr.ph:
+#CHECK-HOIST: %14:g8rc = COPY $x2
+#CHECK-HOIST: STD %14, 24, $x1 :: (store 8 into stack + 24)
+#CHECK-HOIST: %15:g8rc = EXTSW_32_64 %7
+#CHECK-HOIST: B %bb.3
+
+#CHECK-HOIST: bb.4.if.then:
+#CHECK-HOIST-NOT: %14:g8rc = COPY $x2
+#CHECK-HOIST-NOT: STD %14, 24, $x1 :: (store 8 into stack + 24)
+#CHECK-HOIST-NOT: %15:g8rc = EXTSW_32_64 %7
+#CHECK-HOIST: bb.5.for.inc:
+
+# CHECK for disabling instruction hoisting due to block hotness
+#CHECK-LABEL: test
+#CHECK-NO-HOIST: bb.1.for.body.lr.ph:
+#CHECK-NO-HOIST-NOT: %14:g8rc = COPY $x2
+#CHECK-NO-HOIST-NOT: STD %14, 24, $x1 :: (store 8 into stack + 24)
+#CHECK-NO-HOIST-NOT: %15:g8rc = EXTSW_32_64 %7
+#CHECK-NO-HOIST: B %bb.3
+
+#CHECK-NO-HOIST: bb.4.if.then:
+#CHECK-NO-HOIST: %14:g8rc = COPY $x2
+#CHECK-NO-HOIST: STD %14, 24, $x1 :: (store 8 into stack + 24)
+#CHECK-NO-HOIST: %15:g8rc = EXTSW_32_64 %7
+#CHECK-NO-HOIST: bb.5.for.inc:
+
More information about the llvm-commits
mailing list