[llvm] r292971 - Remove the load hoisting code of MLSM, it is completely subsumed by GVNHoist

Daniel Berlin via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 24 11:55:37 PST 2017


Author: dannyb
Date: Tue Jan 24 13:55:36 2017
New Revision: 292971

URL: http://llvm.org/viewvc/llvm-project?rev=292971&view=rev
Log:
Remove the load hoisting code of MLSM, it is completely subsumed by GVNHoist

Summary:
GVNHoist performs all the optimizations that MLSM does to loads, in a
more general way, and in a faster time bound (MLSM is N^3 in most
cases, N^4 in a few edge cases).

This disables the load portion.

Note that the way ld_hoist_st_sink.ll is written makes one think that
the loads should be moved to the while.preheader block, but

1. Neither MLSM nor GVNHoist do it (they both move them to identical places).

2. MLSM couldn't possibly do it anyway, as the while.preheader block
is not the head of the diamond, while.body is.  (GVNHoist could do it
if it was legal).

3. At a glance, it's not legal anyway because the in-loop load
conflict with the in-loop store, so the loads must stay in-loop.

I am happy to update the test to use update_test_checks so that
checking is tighter, just was going to do it as a followup.

Note that i can find no particular benefit to the store portion on any
real testcase/benchmark i have (even size-wise).  If we really still
want it, i am happy to commit to writing a targeted store sinker, just
taking the code from the MemorySSA port of MergedLoadStoreMotion
(which is N^2 worst case, and N most of the time).

We can do what it does in a much better time bound.

We also should be both hoisting and sinking stores, not just sinking
them, anyway, since whether we should hoist or sink to merge depends
basically on luck of the draw of where the blockers are placed.

Nonetheless, i have left it alone for now.

Reviewers: chandlerc, davide

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D29079

Added:
    llvm/trunk/test/Transforms/GVNHoist/ld_hoist1.ll
      - copied, changed from r292837, llvm/trunk/test/Transforms/InstMerge/ld_hoist1.ll
    llvm/trunk/test/Transforms/GVNHoist/ld_hoist_st_sink.ll
      - copied, changed from r292837, llvm/trunk/test/Transforms/InstMerge/ld_hoist_st_sink.ll
Removed:
    llvm/trunk/test/Transforms/InstMerge/ld_hoist1.ll
    llvm/trunk/test/Transforms/InstMerge/ld_hoist_st_sink.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp?rev=292971&r1=292970&r2=292971&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp Tue Jan 24 13:55:36 2017
@@ -19,6 +19,8 @@
 // thinks it safe to do so.  This optimization helps with eg. hiding load
 // latencies, triggering if-conversion, and reducing static code size.
 //
+// NOTE: This code no longer performs load hoisting, it is subsumed by GVNHoist.
+//
 //===----------------------------------------------------------------------===//
 //
 //
@@ -118,16 +120,6 @@ private:
   void removeInstruction(Instruction *Inst);
   BasicBlock *getDiamondTail(BasicBlock *BB);
   bool isDiamondHead(BasicBlock *BB);
-  // Routines for hoisting loads
-  bool isLoadHoistBarrierInRange(const Instruction &Start,
-                                 const Instruction &End, LoadInst *LI,
-                                 bool SafeToLoadUnconditionally);
-  LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI);
-  void hoistInstruction(BasicBlock *BB, Instruction *HoistCand,
-                        Instruction *ElseInst);
-  bool isSafeToHoist(Instruction *I) const;
-  bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst);
-  bool mergeLoads(BasicBlock *BB);
   // Routines for sinking stores
   StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI);
   PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1);
@@ -188,169 +180,6 @@ bool MergedLoadStoreMotion::isDiamondHea
   return true;
 }
 
-///
-/// \brief True when instruction is a hoist barrier for a load
-///
-/// Whenever an instruction could possibly modify the value
-/// being loaded or protect against the load from happening
-/// it is considered a hoist barrier.
-///
-bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(
-    const Instruction &Start, const Instruction &End, LoadInst *LI,
-    bool SafeToLoadUnconditionally) {
-  if (!SafeToLoadUnconditionally)
-    for (const Instruction &Inst :
-         make_range(Start.getIterator(), End.getIterator()))
-      if (!isGuaranteedToTransferExecutionToSuccessor(&Inst))
-        return true;
-  MemoryLocation Loc = MemoryLocation::get(LI);
-  return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod);
-}
-
-///
-/// \brief Decide if a load can be hoisted
-///
-/// When there is a load in \p BB to the same address as \p LI
-/// and it can be hoisted from \p BB, return that load.
-/// Otherwise return Null.
-///
-LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1,
-                                                   LoadInst *Load0) {
-  BasicBlock *BB0 = Load0->getParent();
-  BasicBlock *Head = BB0->getSinglePredecessor();
-  bool SafeToLoadUnconditionally = isSafeToLoadUnconditionally(
-      Load0->getPointerOperand(), Load0->getAlignment(),
-      Load0->getModule()->getDataLayout(),
-      /*ScanFrom=*/Head->getTerminator());
-  for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE;
-       ++BBI) {
-    Instruction *Inst = &*BBI;
-
-    // Only merge and hoist loads when their result in used only in BB
-    auto *Load1 = dyn_cast<LoadInst>(Inst);
-    if (!Load1 || Inst->isUsedOutsideOfBlock(BB1))
-      continue;
-
-    MemoryLocation Loc0 = MemoryLocation::get(Load0);
-    MemoryLocation Loc1 = MemoryLocation::get(Load1);
-    if (Load0->isSameOperationAs(Load1) && AA->isMustAlias(Loc0, Loc1) &&
-        !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1,
-                                   SafeToLoadUnconditionally) &&
-        !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0,
-                                   SafeToLoadUnconditionally)) {
-      return Load1;
-    }
-  }
-  return nullptr;
-}
-
-///
-/// \brief Merge two equivalent instructions \p HoistCand and \p ElseInst into
-/// \p BB
-///
-/// BB is the head of a diamond
-///
-void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB,
-                                             Instruction *HoistCand,
-                                             Instruction *ElseInst) {
-  DEBUG(dbgs() << " Hoist Instruction into BB \n"; BB->dump();
-        dbgs() << "Instruction Left\n"; HoistCand->dump(); dbgs() << "\n";
-        dbgs() << "Instruction Right\n"; ElseInst->dump(); dbgs() << "\n");
-  // Hoist the instruction.
-  assert(HoistCand->getParent() != BB);
-
-  // Intersect optional metadata.
-  HoistCand->andIRFlags(ElseInst);
-  HoistCand->dropUnknownNonDebugMetadata();
-
-  // Prepend point for instruction insert
-  Instruction *HoistPt = BB->getTerminator();
-
-  // Merged instruction
-  Instruction *HoistedInst = HoistCand->clone();
-
-  // Hoist instruction.
-  HoistedInst->insertBefore(HoistPt);
-
-  HoistCand->replaceAllUsesWith(HoistedInst);
-  removeInstruction(HoistCand);
-  // Replace the else block instruction.
-  ElseInst->replaceAllUsesWith(HoistedInst);
-  removeInstruction(ElseInst);
-}
-
-///
-/// \brief Return true if no operand of \p I is defined in I's parent block
-///
-bool MergedLoadStoreMotion::isSafeToHoist(Instruction *I) const {
-  BasicBlock *Parent = I->getParent();
-  for (Use &U : I->operands())
-    if (auto *Instr = dyn_cast<Instruction>(&U))
-      if (Instr->getParent() == Parent)
-        return false;
-  return true;
-}
-
-///
-/// \brief Merge two equivalent loads and GEPs and hoist into diamond head
-///
-bool MergedLoadStoreMotion::hoistLoad(BasicBlock *BB, LoadInst *L0,
-                                      LoadInst *L1) {
-  // Only one definition?
-  auto *A0 = dyn_cast<Instruction>(L0->getPointerOperand());
-  auto *A1 = dyn_cast<Instruction>(L1->getPointerOperand());
-  if (A0 && A1 && A0->isIdenticalTo(A1) && isSafeToHoist(A0) &&
-      A0->hasOneUse() && (A0->getParent() == L0->getParent()) &&
-      A1->hasOneUse() && (A1->getParent() == L1->getParent()) &&
-      isa<GetElementPtrInst>(A0)) {
-    DEBUG(dbgs() << "Hoist Instruction into BB \n"; BB->dump();
-          dbgs() << "Instruction Left\n"; L0->dump(); dbgs() << "\n";
-          dbgs() << "Instruction Right\n"; L1->dump(); dbgs() << "\n");
-    hoistInstruction(BB, A0, A1);
-    hoistInstruction(BB, L0, L1);
-    return true;
-  }
-  return false;
-}
-
-///
-/// \brief Try to hoist two loads to same address into diamond header
-///
-/// Starting from a diamond head block, iterate over the instructions in one
-/// successor block and try to match a load in the second successor.
-///
-bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
-  bool MergedLoads = false;
-  assert(isDiamondHead(BB));
-  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
-  BasicBlock *Succ0 = BI->getSuccessor(0);
-  BasicBlock *Succ1 = BI->getSuccessor(1);
-  // #Instructions in Succ1 for Compile Time Control
-  int Size1 = Succ1->size();
-  int NLoads = 0;
-  for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end();
-       BBI != BBE;) {
-    Instruction *I = &*BBI;
-    ++BBI;
-
-    // Don't move non-simple (atomic, volatile) loads.
-    auto *L0 = dyn_cast<LoadInst>(I);
-    if (!L0 || !L0->isSimple() || L0->isUsedOutsideOfBlock(Succ0))
-      continue;
-
-    ++NLoads;
-    if (NLoads * Size1 >= MagicCompileTimeControl)
-      break;
-    if (LoadInst *L1 = canHoistFromBlock(Succ1, L0)) {
-      bool Res = hoistLoad(BB, L0, L1);
-      MergedLoads |= Res;
-      // Don't attempt to hoist above loads that had not been hoisted.
-      if (!Res)
-        break;
-    }
-  }
-  return MergedLoads;
-}
 
 ///
 /// \brief True when instruction is a sink barrier for a store
@@ -534,7 +363,6 @@ bool MergedLoadStoreMotion::run(Function
     // Hoist equivalent loads and sink stores
     // outside diamonds when possible
     if (isDiamondHead(BB)) {
-      Changed |= mergeLoads(BB);
       Changed |= mergeStores(getDiamondTail(BB));
     }
   }

Copied: llvm/trunk/test/Transforms/GVNHoist/ld_hoist1.ll (from r292837, llvm/trunk/test/Transforms/InstMerge/ld_hoist1.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVNHoist/ld_hoist1.ll?p2=llvm/trunk/test/Transforms/GVNHoist/ld_hoist1.ll&p1=llvm/trunk/test/Transforms/InstMerge/ld_hoist1.ll&r1=292837&r2=292971&rev=292971&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstMerge/ld_hoist1.ll (original)
+++ llvm/trunk/test/Transforms/GVNHoist/ld_hoist1.ll Tue Jan 24 13:55:36 2017
@@ -1,5 +1,5 @@
 ; Test load hoist
-; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+; RUN: opt -gvn-hoist -S < %s | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc_linux"
 

Copied: llvm/trunk/test/Transforms/GVNHoist/ld_hoist_st_sink.ll (from r292837, llvm/trunk/test/Transforms/InstMerge/ld_hoist_st_sink.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVNHoist/ld_hoist_st_sink.ll?p2=llvm/trunk/test/Transforms/GVNHoist/ld_hoist_st_sink.ll&p1=llvm/trunk/test/Transforms/InstMerge/ld_hoist_st_sink.ll&r1=292837&r2=292971&rev=292971&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstMerge/ld_hoist_st_sink.ll (original)
+++ llvm/trunk/test/Transforms/GVNHoist/ld_hoist_st_sink.ll Tue Jan 24 13:55:36 2017
@@ -1,6 +1,6 @@
 ; Tests to make sure that loads and stores in a diamond get merged
 ; Loads are hoisted into the header. Stores sunks into the footer.
-; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+; RUN: opt -gvn-hoist -S < %s | FileCheck %s
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 %struct.node = type { i64, %struct.node*, %struct.node*, %struct.node*, i64, %struct.arc*, i64, i64, i64 }
@@ -41,7 +41,7 @@ if.then:
   %4 = load i64, i64* %p, align 8
   %add = add nsw i64 %4, %2
   %p1 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 6
-; CHECK-NOT: store i64
+; FIXME: store i64
   store i64 %add, i64* %p1, align 8
   br label %if.end
 
@@ -61,13 +61,13 @@ if.else:
   %8 = load i64, i64* %cost5, align 8
   %sub = sub nsw i64 %6, %8
   %p6 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 6
-; CHECK-NOT: store i64
+; FIXME: store i64
   store i64 %sub, i64* %p6, align 8
   br label %if.end
 
 ; CHECK: if.end
 if.end:                                           ; preds = %if.else, %if.then
-; CHECK: store
+; FIXME: store
   %inc = add nsw i64 %sum.019, 1
   %node.0.in = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 2
   %node.0 = load %struct.node*, %struct.node** %node.0.in, align 8

Removed: llvm/trunk/test/Transforms/InstMerge/ld_hoist1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstMerge/ld_hoist1.ll?rev=292970&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstMerge/ld_hoist1.ll (original)
+++ llvm/trunk/test/Transforms/InstMerge/ld_hoist1.ll (removed)
@@ -1,64 +0,0 @@
-; Test load hoist
-; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc_linux"
-
-; Function Attrs: nounwind uwtable
-define float* @foo(i32* noalias nocapture readonly %in, float* noalias %out, i32 %size, i32* nocapture readonly %trigger)  {
-entry:
-  %cmp11 = icmp eq i32 %size, 0
-  br i1 %cmp11, label %for.end, label %for.body.lr.ph
-
-for.body.lr.ph:                                   ; preds = %entry
-  %0 = add i32 %size, -1
-  br label %for.body
-
-; CHECK-LABEL: for.body
-; CHECK: load
-; CHECK:  %2 = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
-; CHECK:  %3 = load i32, i32* %2, align 4
-
-for.body:                                         ; preds = %for.body.lr.ph, %for.inc
-  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.inc ]
-  %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx, align 4
-  %cmp1 = icmp sgt i32 %1, 0
-  br i1 %cmp1, label %if.then, label %if.else
-
-; CHECK-LABEL: if.then
-if.then:                                          ; preds = %for.body
-; This load should be hoisted
-  %arrayidx3 = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx3, align 4
-  %conv = sitofp i32 %2 to float
-  %add = fadd float %conv, 5.000000e-01
-  %arrayidx5 = getelementptr inbounds float, float* %out, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
-  br label %for.inc
-
-if.else:                                          ; preds = %for.body
-  %arrayidx7 = getelementptr inbounds float, float* %out, i64 %indvars.iv
-  %3 = load float, float* %arrayidx7, align 4
-  %div = fdiv float %3, 3.000000e+00
-  store float %div, float* %arrayidx7, align 4
-; This load should be hoisted in spite of store 
-  %arrayidx9 = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
-  %4 = load i32, i32* %arrayidx9, align 4
-  %conv10 = sitofp i32 %4 to float
-  %add13 = fadd float %div, %conv10
-  store float %add13, float* %arrayidx7, align 4
-  br label %for.inc
-
-for.inc:                                          ; preds = %if.then, %if.else
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv to i32
-  %exitcond = icmp ne i32 %lftr.wideiv, %0
-  br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
-
-for.cond.for.end_crit_edge:                       ; preds = %for.inc
-  br label %for.end
-
-for.end:                                          ; preds = %entry, %for.cond.for.end_crit_edge
-  ret float* %out
-}
-

Removed: llvm/trunk/test/Transforms/InstMerge/ld_hoist_st_sink.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstMerge/ld_hoist_st_sink.ll?rev=292970&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstMerge/ld_hoist_st_sink.ll (original)
+++ llvm/trunk/test/Transforms/InstMerge/ld_hoist_st_sink.ll (removed)
@@ -1,84 +0,0 @@
-; Tests to make sure that loads and stores in a diamond get merged
-; Loads are hoisted into the header. Stores sunks into the footer.
-; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
-target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-
-%struct.node = type { i64, %struct.node*, %struct.node*, %struct.node*, i64, %struct.arc*, i64, i64, i64 }
-%struct.arc = type { i64, i64, i64 }
-
-define i64 @foo(%struct.node* nocapture readonly %r) nounwind {
-entry:
-  %node.0.in16 = getelementptr inbounds %struct.node, %struct.node* %r, i64 0, i32 2
-  %node.017 = load %struct.node*, %struct.node** %node.0.in16, align 8
-  %tobool18 = icmp eq %struct.node* %node.017, null
-  br i1 %tobool18, label %while.end, label %while.body.preheader
-
-; CHECK-LABEL: while.body.preheader
-while.body.preheader:                             ; preds = %entry
-; CHECK: load
-  br label %while.body
-
-while.body:                                       ; preds = %while.body.preheader, %if.end
-  %node.020 = phi %struct.node* [ %node.0, %if.end ], [ %node.017, %while.body.preheader ]
-  %sum.019 = phi i64 [ %inc, %if.end ], [ 0, %while.body.preheader ]
-  %orientation = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 4
-  %0 = load i64, i64* %orientation, align 8
-  %cmp = icmp eq i64 %0, 1
-  br i1 %cmp, label %if.then, label %if.else
-; CHECK: if.then
-if.then:                                          ; preds = %while.body
-  %a = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 5
-; CHECK-NOT: load %struct.arc
-  %1 = load %struct.arc*, %struct.arc** %a, align 8
-  %cost = getelementptr inbounds %struct.arc, %struct.arc* %1, i64 0, i32 0
-; CHECK-NOT: load i64, i64*
-  %2 = load i64, i64* %cost, align 8
-  %pred = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 1
-; CHECK-NOT: load %struct.node*, %struct.node**
-  %3 = load %struct.node*, %struct.node** %pred, align 8
-  %p = getelementptr inbounds %struct.node, %struct.node* %3, i64 0, i32 6
-; CHECK-NOT: load i64, i64*
-  %4 = load i64, i64* %p, align 8
-  %add = add nsw i64 %4, %2
-  %p1 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 6
-; CHECK-NOT: store i64
-  store i64 %add, i64* %p1, align 8
-  br label %if.end
-
-; CHECK: if.else
-if.else:                                          ; preds = %while.body
-  %pred2 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 1
-; CHECK-NOT: load %struct.node*, %struct.node**
-  %5 = load %struct.node*, %struct.node** %pred2, align 8
-  %p3 = getelementptr inbounds %struct.node, %struct.node* %5, i64 0, i32 6
-; CHECK-NOT: load i64, i64*
-  %6 = load i64, i64* %p3, align 8
-  %a4 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 5
-; CHECK-NOT: load %struct.arc*, %struct.arc**
-  %7 = load %struct.arc*, %struct.arc** %a4, align 8
-  %cost5 = getelementptr inbounds %struct.arc, %struct.arc* %7, i64 0, i32 0
-; CHECK-NOT: load i64, i64*
-  %8 = load i64, i64* %cost5, align 8
-  %sub = sub nsw i64 %6, %8
-  %p6 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 6
-; CHECK-NOT: store i64
-  store i64 %sub, i64* %p6, align 8
-  br label %if.end
-
-; CHECK: if.end
-if.end:                                           ; preds = %if.else, %if.then
-; CHECK: store
-  %inc = add nsw i64 %sum.019, 1
-  %node.0.in = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 2
-  %node.0 = load %struct.node*, %struct.node** %node.0.in, align 8
-  %tobool = icmp eq %struct.node* %node.0, null
-  br i1 %tobool, label %while.end.loopexit, label %while.body
-
-while.end.loopexit:                               ; preds = %if.end
-  %inc.lcssa = phi i64 [ %inc, %if.end ]
-  br label %while.end
-
-while.end:                                        ; preds = %while.end.loopexit, %entry
-  %sum.0.lcssa = phi i64 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
-  ret i64 %sum.0.lcssa
-}




More information about the llvm-commits mailing list