[llvm] edab7dd - Disable hoisting MI to hotter basic blocks

Victor Huang via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 11 13:36:22 PST 2019


Author: Victor Huang
Date: 2019-11-11T21:32:56Z
New Revision: edab7dd426249bd40059b49b255ba9cc5b784753

URL: https://github.com/llvm/llvm-project/commit/edab7dd426249bd40059b49b255ba9cc5b784753
DIFF: https://github.com/llvm/llvm-project/commit/edab7dd426249bd40059b49b255ba9cc5b784753.diff

LOG: Disable hoisting MI to hotter basic blocks

In current Hoist() function of machine licm pass, it will not check the source and destination basic block frequencies that a instruction is hoisted from/to.
There is a chance that instruction is hoisted from a cold to a hot basic block.

In this patch, we add options to disable machine instruction hoisting if destination block is hotter.

Differential Revision: https://reviews.llvm.org/D63676

Added: 
    llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessNoProfileData.mir
    llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir

Modified: 
    llvm/lib/CodeGen/MachineLICM.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 6a898ff6ef88..194125feea0d 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -74,6 +75,27 @@ static cl::opt<bool>
 HoistConstStores("hoist-const-stores",
                  cl::desc("Hoist invariant stores"),
                  cl::init(true), cl::Hidden);
+// The default threshold of 100 (i.e. if target block is 100 times hotter)
+// is based on empirical data on a single target and is subject to tuning.
+static cl::opt<unsigned>
+BlockFrequencyRatioThreshold("block-freq-ratio-threshold",
+                             cl::desc("Do not hoist instructions if target"
+                             "block is N times hotter than the source."),
+                             cl::init(100), cl::Hidden);
+
+enum class UseBFI { None, PGO, All };
+
+static cl::opt<UseBFI>
+DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
+                              cl::desc("Disable hoisting instructions to"
+                              " hotter blocks"),
+                              cl::init(UseBFI::None), cl::Hidden,
+                              cl::values(clEnumValN(UseBFI::None, "none",
+                              "disable the feature"),
+                              clEnumValN(UseBFI::PGO, "pgo",
+                              "enable the feature when using profile data"),
+                              clEnumValN(UseBFI::All, "all",
+                              "enable the feature with/wo profile data")));
 
 STATISTIC(NumHoisted,
           "Number of machine instructions hoisted out of loops");
@@ -87,6 +109,8 @@ STATISTIC(NumPostRAHoisted,
           "Number of machine instructions hoisted out of loops post regalloc");
 STATISTIC(NumStoreConst,
           "Number of stores of const phys reg hoisted out of loops");
+STATISTIC(NumNotHoistedDueToHotness,
+          "Number of instructions not hoisted due to block frequency");
 
 namespace {
 
@@ -98,9 +122,11 @@ namespace {
     MachineRegisterInfo *MRI;
     TargetSchedModel SchedModel;
     bool PreRegAlloc;
+    bool HasProfileData;
 
     // Various analyses that we use...
     AliasAnalysis        *AA;      // Alias analysis info.
+    MachineBlockFrequencyInfo *MBFI; // Machine block frequncy info
     MachineLoopInfo      *MLI;     // Current MachineLoopInfo
     MachineDominatorTree *DT;      // Machine dominator tree for the cur loop
 
@@ -150,6 +176,8 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<MachineLoopInfo>();
+      if (DisableHoistingToHotterBlocks != UseBFI::None)
+        AU.addRequired<MachineBlockFrequencyInfo>();
       AU.addRequired<MachineDominatorTree>();
       AU.addRequired<AAResultsWrapperPass>();
       AU.addPreserved<MachineLoopInfo>();
@@ -245,6 +273,8 @@ namespace {
 
     void InitCSEMap(MachineBasicBlock *BB);
 
+    bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
+                            MachineBasicBlock *TgtBlock);
     MachineBasicBlock *getCurPreheader();
   };
 
@@ -275,6 +305,7 @@ char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
 INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
                       "Machine Loop Invariant Code Motion", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
@@ -283,6 +314,7 @@ INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
 INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
                       "Early Machine Loop Invariant Code Motion", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
@@ -315,6 +347,7 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
   SchedModel.init(&ST);
 
   PreRegAlloc = MRI->isSSA();
+  HasProfileData = MF.getFunction().hasProfileData();
 
   if (PreRegAlloc)
     LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
@@ -333,6 +366,8 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
   }
 
   // Get our Loop information...
+  if (DisableHoistingToHotterBlocks != UseBFI::None)
+    MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
   MLI = &getAnalysis<MachineLoopInfo>();
   DT  = &getAnalysis<MachineDominatorTree>();
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
@@ -1433,6 +1468,15 @@ bool MachineLICMBase::MayCSE(MachineInstr *MI) {
 /// that are safe to hoist, this instruction is called to do the dirty work.
 /// It returns true if the instruction is hoisted.
 bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
+  MachineBasicBlock *SrcBlock = MI->getParent();
+
+  // Disable the instruction hoisting due to block hotness
+  if ((DisableHoistingToHotterBlocks == UseBFI::All ||
+      (DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) &&
+      isTgtHotterThanSrc(SrcBlock, Preheader)) {
+    ++NumNotHoistedDueToHotness;
+    return false;
+  }
   // First check whether we should hoist this instruction.
   if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
     // If not, try unfolding a hoistable load.
@@ -1526,3 +1570,21 @@ MachineBasicBlock *MachineLICMBase::getCurPreheader() {
   }
   return CurPreheader;
 }
+
+/// Is the target basic block at least "BlockFrequencyRatioThreshold"
+/// times hotter than the source basic block.
+bool MachineLICMBase::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
+                                         MachineBasicBlock *TgtBlock) {
+  // Parse source and target basic block frequency from MBFI
+  uint64_t SrcBF = MBFI->getBlockFreq(SrcBlock).getFrequency();
+  uint64_t DstBF = MBFI->getBlockFreq(TgtBlock).getFrequency();
+
+  // Disable the hoisting if source block frequency is zero
+  if (!SrcBF)
+    return true;
+
+  double Ratio = (double)DstBF / SrcBF;
+
+  // Compare the block frequency ratio with the threshold
+  return Ratio > BlockFrequencyRatioThreshold;
+}

diff  --git a/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessNoProfileData.mir b/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessNoProfileData.mir
new file mode 100644
index 000000000000..84afa3f09e1e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessNoProfileData.mir
@@ -0,0 +1,190 @@
+# NOTE: This test verifies disable/enable instruction hoisting to hot blocks based on non-profile data
+# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=all -block-freq-ratio-threshold=100 %s -o - | FileCheck %s --check-prefix=CHECK-NO-HOIST
+# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=all -block-freq-ratio-threshold=100000000 %s -o - | FileCheck %s --check-prefix=CHECK-HOIST
+# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=pgo -block-freq-ratio-threshold=100 %s -o - | FileCheck %s --check-prefix=CHECK-HOIST
+# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=none -block-freq-ratio-threshold=100 %s -o - | FileCheck %s --check-prefix=CHECK-HOIST
+
+--- |
+  target datalayout = "e-m:e-i64:64-n32:64"
+
+  define dso_local void @test(void (i32)* nocapture %fp, i32 signext %Arg, i32 signext %Len, i32* nocapture %Ptr) {
+  entry:
+    tail call void asm sideeffect "#NOTHING", "~{r2}"()
+    %cmp6 = icmp sgt i32 %Len, 0
+    br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup
+
+  for.body.lr.ph:                                   ; preds = %entry
+    %cmp1 = icmp sgt i32 %Arg, 10
+    br label %for.body
+
+  for.cond.cleanup:                                 ; preds = %for.inc, %entry
+    ret void
+
+  for.body:                                         ; preds = %for.inc, %for.body.lr.ph
+    %i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+    %0 = load i32, i32* %Ptr, align 4
+    %1 = add i32 %i.07, %0
+    store i32 %1, i32* %Ptr, align 4
+    br i1 %cmp1, label %if.then, label %for.inc
+
+  if.then:                                          ; preds = %for.body
+    tail call void asm sideeffect "#NOTHING", "~{r2}"()
+    tail call void %fp(i32 signext %Arg)
+    br label %for.inc
+
+  for.inc:                                          ; preds = %if.then, %for.body
+    %inc = add nuw nsw i32 %i.07, 1
+    %exitcond = icmp eq i32 %Len, %inc
+    br i1 %exitcond, label %for.cond.cleanup, label %for.body
+  }
+
+  ; Function Attrs: nounwind
+  declare void @llvm.stackprotector(i8*, i8**) #0
+
+  attributes #0 = { nounwind }
+
+...
+---
+name:            test
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: crbitrc, preferred-register: '' }
+  - { id: 1, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 2, class: gprc, preferred-register: '' }
+  - { id: 3, class: g8rc, preferred-register: '' }
+  - { id: 4, class: g8rc, preferred-register: '' }
+  - { id: 5, class: g8rc, preferred-register: '' }
+  - { id: 6, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 7, class: gprc, preferred-register: '' }
+  - { id: 8, class: gprc, preferred-register: '' }
+  - { id: 9, class: crrc, preferred-register: '' }
+  - { id: 10, class: gprc, preferred-register: '' }
+  - { id: 11, class: crrc, preferred-register: '' }
+  - { id: 12, class: gprc, preferred-register: '' }
+  - { id: 13, class: gprc, preferred-register: '' }
+  - { id: 14, class: g8rc, preferred-register: '' }
+  - { id: 15, class: g8rc, preferred-register: '' }
+  - { id: 16, class: crrc, preferred-register: '' }
+liveins:
+  - { reg: '$x3', virtual-reg: '%3' }
+  - { reg: '$x4', virtual-reg: '%4' }
+  - { reg: '$x5', virtual-reg: '%5' }
+  - { reg: '$x6', virtual-reg: '%6' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        true
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x7ecade30), %bb.2(0x013521d0)
+    liveins: $x3, $x4, $x5, $x6
+
+    %6:g8rc_and_g8rc_nox0 = COPY $x6
+    %5:g8rc = COPY $x5
+    %4:g8rc = COPY $x4
+    %3:g8rc = COPY $x3
+    %7:gprc = COPY %4.sub_32
+    %8:gprc = COPY %5.sub_32
+    INLINEASM &"#NOTHING", 1, 12, implicit-def early-clobber $r2
+    %9:crrc = CMPWI %8, 1
+    BCC 12, killed %9, %bb.2
+    B %bb.1
+
+  bb.1.for.body.lr.ph:
+    successors: %bb.3(0x80000000)
+
+    %11:crrc = CMPWI %7, 10
+    %0:crbitrc = COPY %11.sub_gt
+    %10:gprc = LI 0
+    B %bb.3
+
+  bb.2.for.cond.cleanup:
+    BLR8 implicit $lr8, implicit $rm
+
+  bb.3.for.body:
+    successors: %bb.4(0x00000002), %bb.5(0x7ffffffe)
+
+    %1:gprc_and_gprc_nor0 = PHI %10, %bb.1, %2, %bb.5
+    %12:gprc = LWZ 0, %6 :: (load 4 from %ir.Ptr)
+    %13:gprc = ADD4 %1, killed %12
+    STW killed %13, 0, %6 :: (store 4 into %ir.Ptr)
+    BCn %0, %bb.5
+    B %bb.4
+
+  bb.4.if.then:
+    successors: %bb.5(0x80000000)
+
+    INLINEASM &"#NOTHING", 1, 12, implicit-def early-clobber $r2
+    ADJCALLSTACKDOWN 32, 0, implicit-def dead $r1, implicit $r1
+    %14:g8rc = COPY $x2
+    STD %14, 24, $x1 :: (store 8 into stack + 24)
+    %15:g8rc = EXTSW_32_64 %7
+    $x3 = COPY %15
+    $x12 = COPY %3
+    MTCTR8 %3, implicit-def $ctr8
+    BCTRL8_LDinto_toc 24, $x1, csr_svr464_altivec, implicit-def dead $lr8, implicit-def dead $x2, implicit $ctr8, implicit $rm, implicit $x3, implicit $x12, implicit $x2, implicit-def $r1
+    ADJCALLSTACKUP 32, 0, implicit-def dead $r1, implicit $r1
+
+  bb.5.for.inc:
+    successors: %bb.2(0x013521d0), %bb.3(0x7ecade30)
+
+    %2:gprc = nuw nsw ADDI %1, 1
+    %16:crrc = CMPLW %8, %2
+    BCC 76, killed %16, %bb.2
+    B %bb.3
+
+...
+
+# CHECK for enabling instruction hoisting
+#CHECK-LABEL: test
+#CHECK-HOIST: bb.1.for.body.lr.ph:
+#CHECK-HOIST: %14:g8rc = COPY $x2
+#CHECK-HOIST: STD %14, 24, $x1 :: (store 8 into stack + 24)
+#CHECK-HOIST: %15:g8rc = EXTSW_32_64 %7
+#CHECK-HOIST: B %bb.3
+
+#CHECK-HOIST: bb.4.if.then:
+#CHECK-HOIST-NOT: %14:g8rc = COPY $x2
+#CHECK-HOIST-NOT: STD %14, 24, $x1 :: (store 8 into stack + 24)
+#CHECK-HOIST-NOT: %15:g8rc = EXTSW_32_64 %7
+#CHECK-HOIST: bb.5.for.inc:
+
+# CHECK for disabling instruction hoisting due to block hotness
+#CHECK-LABEL: test
+#CHECK-NO-HOIST: bb.1.for.body.lr.ph:
+#CHECK-NO-HOIST-NOT: %14:g8rc = COPY $x2
+#CHECK-NO-HOIST-NOT: STD %14, 24, $x1 :: (store 8 into stack + 24)
+#CHECK-NO-HOIST-NOT: %15:g8rc = EXTSW_32_64 %7
+#CHECK-NO-HOIST: B %bb.3
+
+#CHECK-NO-HOIST: bb.4.if.then:
+#CHECK-NO-HOIST: %14:g8rc = COPY $x2
+#CHECK-NO-HOIST: STD %14, 24, $x1 :: (store 8 into stack + 24)
+#CHECK-NO-HOIST: %15:g8rc = EXTSW_32_64 %7
+#CHECK-NO-HOIST: bb.5.for.inc:

diff  --git a/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir b/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir
new file mode 100644
index 000000000000..a9b65574066e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir
@@ -0,0 +1,237 @@
+# NOTE: This test verifies disable/enable instruction hoisting to hot blocks based on profile data
+# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=pgo -block-freq-ratio-threshold=100 %s -o - | FileCheck %s --check-prefix=CHECK-NO-HOIST
+# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=pgo -block-freq-ratio-threshold=100000000 %s -o - | FileCheck %s --check-prefix=CHECK-HOIST
+# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=none -block-freq-ratio-threshold=100 %s -o - | FileCheck %s --check-prefix=CHECK-HOIST
+
+--- |
+  target datalayout = "e-m:e-i64:64-n32:64"
+  target triple = "powerpc64le-unknown-linux-gnu"
+
+  ; Function Attrs: nounwind
+  define dso_local void @test(void (i32)* nocapture %fp, i32 signext %Arg, i32 signext %Len, i32* nocapture %Ptr) local_unnamed_addr #0 !prof !29 !section_prefix !30 {
+  entry:
+    tail call void asm sideeffect "#NOTHING", "~{r2}"() #1, !srcloc !31
+    %cmp6 = icmp sgt i32 %Len, 0
+    br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup, !prof !32
+
+  for.body.lr.ph:                                   ; preds = %entry
+    %cmp1 = icmp sgt i32 %Arg, 10
+    br label %for.body
+
+  for.cond.cleanup:                                 ; preds = %for.inc, %entry
+    ret void
+
+  for.body:                                         ; preds = %for.inc, %for.body.lr.ph
+    %i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+    %0 = load i32, i32* %Ptr, align 4, !tbaa !33
+    %1 = add i32 %i.07, %0
+    store i32 %1, i32* %Ptr, align 4, !tbaa !33
+    br i1 %cmp1, label %if.then, label %for.inc, !prof !37
+
+  if.then:                                          ; preds = %for.body
+    tail call void asm sideeffect "#NOTHING", "~{r2}"() #1, !srcloc !31
+    tail call void %fp(i32 signext %Arg) #1, !prof !38
+    br label %for.inc
+
+  for.inc:                                          ; preds = %if.then, %for.body
+    %inc = add nuw nsw i32 %i.07, 1
+    %exitcond = icmp eq i32 %Len, %inc
+    br i1 %exitcond, label %for.cond.cleanup, label %for.body, !prof !39
+  }
+
+  ; Function Attrs: nounwind
+  declare void @llvm.stackprotector(i8*, i8**) #1
+
+  attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #1 = { nounwind }
+
+  !llvm.module.flags = !{!0, !1}
+  !llvm.ident = !{!28}
+
+  !0 = !{i32 1, !"wchar_size", i32 4}
+  !1 = !{i32 1, !"ProfileSummary", !2}
+  !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+  !3 = !{!"ProfileFormat", !"InstrProf"}
+  !4 = !{!"TotalCount", i64 25405000087}
+  !5 = !{!"MaxCount", i64 21000000020}
+  !6 = !{!"MaxInternalCount", i64 200000003}
+  !7 = !{!"MaxFunctionCount", i64 21000000020}
+  !8 = !{!"NumCounts", i64 15}
+  !9 = !{!"NumFunctions", i64 7}
+  !10 = !{!"DetailedSummary", !11}
+  !11 = !{!12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27}
+  !12 = !{i32 10000, i64 21000000020, i32 1}
+  !13 = !{i32 100000, i64 21000000020, i32 1}
+  !14 = !{i32 200000, i64 21000000020, i32 1}
+  !15 = !{i32 300000, i64 21000000020, i32 1}
+  !16 = !{i32 400000, i64 21000000020, i32 1}
+  !17 = !{i32 500000, i64 21000000020, i32 1}
+  !18 = !{i32 600000, i64 21000000020, i32 1}
+  !19 = !{i32 700000, i64 21000000020, i32 1}
+  !20 = !{i32 800000, i64 21000000020, i32 1}
+  !21 = !{i32 900000, i64 4203000000, i32 2}
+  !22 = !{i32 950000, i64 4203000000, i32 2}
+  !23 = !{i32 990000, i64 4203000000, i32 2}
+  !24 = !{i32 999000, i64 200000003, i32 3}
+  !25 = !{i32 999900, i64 200000003, i32 3}
+  !26 = !{i32 999990, i64 2000000, i32 4}
+  !27 = !{i32 999999, i64 2000000, i32 4}
+  !28 = !{!"clang version 9.0.0 (git at github.ibm.com:compiler/llvm-project.git 01fc2fc8e690ee427cab149cb0bfd63568bed89b)"}
+  !29 = !{!"function_entry_count", i64 200000003}
+  !30 = !{!"function_section_prefix", !".hot"}
+  !31 = !{i32 65}
+  !32 = !{!"branch_weights", i32 -94967292, i32 40000000}
+  !33 = !{!34, !34, i64 0}
+  !34 = !{!"int", !35, i64 0}
+  !35 = !{!"omnipotent char", !36, i64 0}
+  !36 = !{!"Simple C/C++ TBAA"}
+  !37 = !{!"branch_weights", i32 4, i32 -94967296}
+  !38 = !{!"VP", i32 0, i64 20, i64 -3706093650706652785, i64 20}
+  !39 = !{!"branch_weights", i32 40000000, i32 -94967292}
+
+...
+---
+name:            test
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: crbitrc, preferred-register: '' }
+  - { id: 1, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 2, class: gprc, preferred-register: '' }
+  - { id: 3, class: g8rc, preferred-register: '' }
+  - { id: 4, class: g8rc, preferred-register: '' }
+  - { id: 5, class: g8rc, preferred-register: '' }
+  - { id: 6, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 7, class: gprc, preferred-register: '' }
+  - { id: 8, class: gprc, preferred-register: '' }
+  - { id: 9, class: crrc, preferred-register: '' }
+  - { id: 10, class: gprc, preferred-register: '' }
+  - { id: 11, class: crrc, preferred-register: '' }
+  - { id: 12, class: gprc, preferred-register: '' }
+  - { id: 13, class: gprc, preferred-register: '' }
+  - { id: 14, class: g8rc, preferred-register: '' }
+  - { id: 15, class: g8rc, preferred-register: '' }
+  - { id: 16, class: crrc, preferred-register: '' }
+liveins:
+  - { reg: '$x3', virtual-reg: '%3' }
+  - { reg: '$x4', virtual-reg: '%4' }
+  - { reg: '$x5', virtual-reg: '%5' }
+  - { reg: '$x6', virtual-reg: '%6' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        true
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x7ecade30), %bb.2(0x013521d0)
+    liveins: $x3, $x4, $x5, $x6
+
+    %6:g8rc_and_g8rc_nox0 = COPY $x6
+    %5:g8rc = COPY $x5
+    %4:g8rc = COPY $x4
+    %3:g8rc = COPY $x3
+    %7:gprc = COPY %4.sub_32
+    %8:gprc = COPY %5.sub_32
+    INLINEASM &"#NOTHING", 1, 12, implicit-def early-clobber $r2, !31
+    %9:crrc = CMPWI %8, 1
+    BCC 12, killed %9, %bb.2
+    B %bb.1
+
+  bb.1.for.body.lr.ph:
+    successors: %bb.3(0x80000000)
+
+    %11:crrc = CMPWI %7, 10
+    %0:crbitrc = COPY %11.sub_gt
+    %10:gprc = LI 0
+    B %bb.3
+
+  bb.2.for.cond.cleanup:
+    BLR8 implicit $lr8, implicit $rm
+
+  bb.3.for.body:
+    successors: %bb.4(0x00000002), %bb.5(0x7ffffffe)
+
+    %1:gprc_and_gprc_nor0 = PHI %10, %bb.1, %2, %bb.5
+    %12:gprc = LWZ 0, %6 :: (load 4 from %ir.Ptr, !tbaa !33)
+    %13:gprc = ADD4 %1, killed %12
+    STW killed %13, 0, %6 :: (store 4 into %ir.Ptr, !tbaa !33)
+    BCn %0, %bb.5
+    B %bb.4
+
+  bb.4.if.then:
+    successors: %bb.5(0x80000000)
+
+    INLINEASM &"#NOTHING", 1, 12, implicit-def early-clobber $r2, !31
+    ADJCALLSTACKDOWN 32, 0, implicit-def dead $r1, implicit $r1
+    %14:g8rc = COPY $x2
+    STD %14, 24, $x1 :: (store 8 into stack + 24)
+    %15:g8rc = EXTSW_32_64 %7
+    $x3 = COPY %15
+    $x12 = COPY %3
+    MTCTR8 %3, implicit-def $ctr8
+    BCTRL8_LDinto_toc 24, $x1, csr_svr464_altivec, implicit-def dead $lr8, implicit-def dead $x2, implicit $ctr8, implicit $rm, implicit $x3, implicit $x12, implicit $x2, implicit-def $r1
+    ADJCALLSTACKUP 32, 0, implicit-def dead $r1, implicit $r1
+
+  bb.5.for.inc:
+    successors: %bb.2(0x013521d0), %bb.3(0x7ecade30)
+
+    %2:gprc = nuw nsw ADDI %1, 1
+    %16:crrc = CMPLW %8, %2
+    BCC 76, killed %16, %bb.2
+    B %bb.3
+
+...
+
+# CHECK for enabling instruction hoisting
+#CHECK-LABEL: test
+#CHECK-HOIST: bb.1.for.body.lr.ph:
+#CHECK-HOIST: %14:g8rc = COPY $x2
+#CHECK-HOIST: STD %14, 24, $x1 :: (store 8 into stack + 24)
+#CHECK-HOIST: %15:g8rc = EXTSW_32_64 %7
+#CHECK-HOIST: B %bb.3
+
+#CHECK-HOIST: bb.4.if.then:
+#CHECK-HOIST-NOT: %14:g8rc = COPY $x2
+#CHECK-HOIST-NOT: STD %14, 24, $x1 :: (store 8 into stack + 24)
+#CHECK-HOIST-NOT: %15:g8rc = EXTSW_32_64 %7
+#CHECK-HOIST: bb.5.for.inc:
+
+# CHECK for disabling instruction hoisting due to block hotness
+#CHECK-LABEL: test
+#CHECK-NO-HOIST: bb.1.for.body.lr.ph:
+#CHECK-NO-HOIST-NOT: %14:g8rc = COPY $x2
+#CHECK-NO-HOIST-NOT: STD %14, 24, $x1 :: (store 8 into stack + 24)
+#CHECK-NO-HOIST-NOT: %15:g8rc = EXTSW_32_64 %7
+#CHECK-NO-HOIST: B %bb.3
+
+#CHECK-NO-HOIST: bb.4.if.then:
+#CHECK-NO-HOIST: %14:g8rc = COPY $x2
+#CHECK-NO-HOIST: STD %14, 24, $x1 :: (store 8 into stack + 24)
+#CHECK-NO-HOIST: %15:g8rc = EXTSW_32_64 %7
+#CHECK-NO-HOIST: bb.5.for.inc:
+


        


More information about the llvm-commits mailing list