[llvm] 7f6360c - Reland: Expand existing loopsink testing to also test loopsinking using new pass manager and fix LICM bug.

Jamie Schmeiser via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 20 07:27:20 PST 2020


Author: Jamie Schmeiser
Date: 2020-11-20T10:26:33-05:00
New Revision: 7f6360cdc68e56187eeed71adaf20c7feb12d235

URL: https://github.com/llvm/llvm-project/commit/7f6360cdc68e56187eeed71adaf20c7feb12d235
DIFF: https://github.com/llvm/llvm-project/commit/7f6360cdc68e56187eeed71adaf20c7feb12d235.diff

LOG: Reland: Expand existing loopsink testing to also test loopsinking using new pass manager and fix LICM bug.
Summary:
Expand existing loopsink testing to also test loopsinking using new pass
manager.  Enable memoryssa for loopsink with new pass manager.  This
combination exposed a bug that was previously fixed for loopsink
without memoryssa.  When sinking an instruction into a loop, the source
block may not be part of the loop but still needs to be checked for
pointer invalidation.  This is the fix for bugzilla #39695 (PR 54659)
expanded to also work with memoryssa.

Respond to review comments.  Enable Memory SSA in legacy Loop Sink pass
under EnableMSSALoopDependency option control.  Update tests accordingly.

Respond to review comments.  Add options controlling whether memoryssa is
used for loop sink, defaulting to off.  Expand testing based on these
options.

Respond to review comments.  Properly indicated preserved analyses.

This relanding addresses a compile-time performance problem by moving
test for profile data earlier to avoid unnecessary computations.

Author: Jamie Schmeiser <schmeise at ca.ibm.com>
Reviewed By: asbirlea (Alina Sbirlea)
Differential Revision: https://reviews.llvm.org/D90249

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LICM.cpp
    llvm/lib/Transforms/Scalar/LoopSink.cpp
    llvm/test/Transforms/LICM/loopsink-pr38462.ll
    llvm/test/Transforms/LICM/loopsink-pr39570.ll
    llvm/test/Transforms/LICM/loopsink-pr39695.ll
    llvm/test/Transforms/LICM/loopsink.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index e64ec4bf6671..1885d1d9f557 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -163,8 +163,10 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
                                      AliasSetTracker *CurAST, Loop *CurLoop,
                                      AAResults *AA);
 static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
-                                             Loop *CurLoop,
+                                             Loop *CurLoop, Instruction &I,
                                              SinkAndHoistLICMFlags &Flags);
+static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
+                                              MemoryUse &MU);
 static Instruction *cloneInstructionInExitBlock(
     Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
@@ -1155,7 +1157,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                                              CurLoop, AA);
     else
       Invalidated = pointerInvalidatedByLoopWithMSSA(
-          MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, *Flags);
+          MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags);
     // Check loop-invariant address because this may also be a sinkable load
     // whose address is not necessarily loop-invariant.
     if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
@@ -1211,7 +1213,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                   CurAST, CurLoop, AA);
             else
               Invalidated = pointerInvalidatedByLoopWithMSSA(
-                  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop,
+                  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
                   *Flags);
             if (Invalidated)
               return false;
@@ -1312,7 +1314,6 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
               }
             }
         }
-
       auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
       Flags->incrementClobberingCalls();
       // If there are no clobbering Defs in the loop, store is safe to hoist.
@@ -2285,7 +2286,7 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
 }
 
 bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
-                                      Loop *CurLoop,
+                                      Loop *CurLoop, Instruction &I,
                                       SinkAndHoistLICMFlags &Flags) {
   // For hoisting, use the walker to determine safety
   if (!Flags.getIsSink()) {
@@ -2321,12 +2322,22 @@ bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
   if (Flags.tooManyMemoryAccesses())
     return true;
   for (auto *BB : CurLoop->getBlocks())
-    if (auto *Accesses = MSSA->getBlockDefs(BB))
-      for (const auto &MA : *Accesses)
-        if (const auto *MD = dyn_cast<MemoryDef>(&MA))
-          if (MU->getBlock() != MD->getBlock() ||
-              !MSSA->locallyDominates(MD, MU))
-            return true;
+    if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU))
+      return true;
+  // When sinking, the source block may not be part of the loop so check it.
+  if (!CurLoop->contains(&I))
+    return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU);
+
+  return false;
+}
+
+bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
+                                       MemoryUse &MU) {
+  if (const auto *Accesses = MSSA.getBlockDefs(&BB))
+    for (const auto &MA : *Accesses)
+      if (const auto *MD = dyn_cast<MemoryDef>(&MA))
+        if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
+          return true;
   return false;
 }
 

diff  --git a/llvm/lib/Transforms/Scalar/LoopSink.cpp b/llvm/lib/Transforms/Scalar/LoopSink.cpp
index 1c03a4bf6c02..bc4d6bbd3ebe 100644
--- a/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -39,6 +39,8 @@
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/IR/Dominators.h"
@@ -67,6 +69,14 @@ static cl::opt<unsigned> MaxNumberOfUseBBsForSinking(
     "max-uses-for-sinking", cl::Hidden, cl::init(30),
     cl::desc("Do not sink instructions that have too many uses."));
 
+static cl::opt<bool> EnableMSSAInLoopSink(
+    "enable-mssa-in-loop-sink", cl::Hidden, cl::init(false),
+    cl::desc("Enable MemorySSA for LoopSink in new pass manager"));
+
+static cl::opt<bool> EnableMSSAInLegacyLoopSink(
+    "enable-mssa-in-legacy-loop-sink", cl::Hidden, cl::init(false),
+    cl::desc("Enable MemorySSA for LoopSink in legacy pass manager"));
+
 /// Return adjusted total frequency of \p BBs.
 ///
 /// * If there is only one BB, sinking instruction will not introduce code
@@ -172,11 +182,10 @@ findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs,
 // sinking is successful.
 // \p LoopBlockNumber is used to sort the insertion blocks to ensure
 // determinism.
-static bool sinkInstruction(Loop &L, Instruction &I,
-                            const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
-                            const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber,
-                            LoopInfo &LI, DominatorTree &DT,
-                            BlockFrequencyInfo &BFI) {
+static bool sinkInstruction(
+    Loop &L, Instruction &I, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
+    const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber, LoopInfo &LI,
+    DominatorTree &DT, BlockFrequencyInfo &BFI, MemorySSAUpdater *MSSAU) {
   // Compute the set of blocks in loop L which contain a use of I.
   SmallPtrSet<BasicBlock *, 2> BBs;
   for (auto &U : I.uses()) {
@@ -230,6 +239,21 @@ static bool sinkInstruction(Loop &L, Instruction &I,
     Instruction *IC = I.clone();
     IC->setName(I.getName());
     IC->insertBefore(&*N->getFirstInsertionPt());
+
+    if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
+      // Create a new MemoryAccess and let MemorySSA set its defining access.
+      MemoryAccess *NewMemAcc =
+          MSSAU->createMemoryAccessInBB(IC, nullptr, N, MemorySSA::Beginning);
+      if (NewMemAcc) {
+        if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
+          MSSAU->insertDef(MemDef, /*RenameUses=*/true);
+        else {
+          auto *MemUse = cast<MemoryUse>(NewMemAcc);
+          MSSAU->insertUse(MemUse, /*RenameUses=*/true);
+        }
+      }
+    }
+
     // Replaces uses of I with IC in N
     I.replaceUsesWithIf(IC, [N](Use &U) {
       return cast<Instruction>(U.getUser())->getParent() == N;
@@ -244,6 +268,11 @@ static bool sinkInstruction(Loop &L, Instruction &I,
   NumLoopSunk++;
   I.moveBefore(&*MoveBB->getFirstInsertionPt());
 
+  if (MSSAU)
+    if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
+            MSSAU->getMemorySSA()->getMemoryAccess(&I)))
+      MSSAU->moveToPlace(OldMemAcc, MoveBB, MemorySSA::Beginning);
+
   return true;
 }
 
@@ -252,15 +281,14 @@ static bool sinkInstruction(Loop &L, Instruction &I,
 static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
                                           DominatorTree &DT,
                                           BlockFrequencyInfo &BFI,
-                                          ScalarEvolution *SE) {
+                                          ScalarEvolution *SE,
+                                          AliasSetTracker *CurAST,
+                                          MemorySSA *MSSA) {
   BasicBlock *Preheader = L.getLoopPreheader();
-  if (!Preheader)
-    return false;
+  assert(Preheader && "Expected loop to have preheader");
 
-  // Enable LoopSink only when runtime profile is available.
-  // With static profile, the sinking decision may be sub-optimal.
-  if (!Preheader->getParent()->hasProfileData())
-    return false;
+  assert(Preheader->getParent()->hasProfileData() &&
+         "Unexpected call when profile data unavailable.");
 
   const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader);
   // If there are no basic blocks with lower frequency than the preheader then
@@ -271,13 +299,15 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
       }))
     return false;
 
-  bool Changed = false;
-  AliasSetTracker CurAST(AA);
+  std::unique_ptr<MemorySSAUpdater> MSSAU;
+  std::unique_ptr<SinkAndHoistLICMFlags> LICMFlags;
+  if (MSSA) {
+    MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
+    LICMFlags =
+        std::make_unique<SinkAndHoistLICMFlags>(/*IsSink=*/true, &L, MSSA);
+  }
 
-  // Compute alias set.
-  for (BasicBlock *BB : L.blocks())
-    CurAST.add(*BB);
-  CurAST.add(*Preheader);
+  bool Changed = false;
 
   // Sort loop's basic blocks by frequency
   SmallVector<BasicBlock *, 10> ColdLoopBBs;
@@ -300,9 +330,11 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
     // No need to check for instruction's operands are loop invariant.
     assert(L.hasLoopInvariantOperands(I) &&
            "Insts in a loop's preheader should have loop invariant operands!");
-    if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false))
+    if (!canSinkOrHoistInst(*I, &AA, &DT, &L, CurAST, MSSAU.get(), false,
+                            LICMFlags.get()))
       continue;
-    if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))
+    if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
+                        MSSAU.get()))
       Changed = true;
   }
 
@@ -311,6 +343,13 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
   return Changed;
 }
 
+static void computeAliasSet(Loop &L, BasicBlock &Preheader,
+                            AliasSetTracker &CurAST) {
+  for (BasicBlock *BB : L.blocks())
+    CurAST.add(*BB);
+  CurAST.add(Preheader);
+}
+
 PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
   LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
   // Nothing to do if there are no loops.
@@ -321,6 +360,10 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
   DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
   BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
 
+  MemorySSA *MSSA = EnableMSSAInLoopSink
+                        ? &FAM.getResult<MemorySSAAnalysis>(F).getMSSA()
+                        : nullptr;
+
   // We want to do a postorder walk over the loops. Since loops are a tree this
   // is equivalent to a reversed preorder walk and preorder is easy to compute
   // without recursion. Since we reverse the preorder, we will visit siblings
@@ -332,11 +375,27 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
   do {
     Loop &L = *PreorderLoops.pop_back_val();
 
+    BasicBlock *Preheader = L.getLoopPreheader();
+    if (!Preheader)
+      continue;
+
+    // Enable LoopSink only when runtime profile is available.
+    // With static profile, the sinking decision may be sub-optimal.
+    if (!Preheader->getParent()->hasProfileData())
+      continue;
+
+    std::unique_ptr<AliasSetTracker> CurAST;
+    if (!EnableMSSAInLoopSink) {
+      CurAST = std::make_unique<AliasSetTracker>(AA);
+      computeAliasSet(L, *Preheader, *CurAST.get());
+    }
+
     // Note that we don't pass SCEV here because it is only used to invalidate
     // loops in SCEV and we don't preserve (or request) SCEV at all making that
     // unnecessary.
     Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI,
-                                             /*ScalarEvolution*/ nullptr);
+                                             /*ScalarEvolution*/ nullptr,
+                                             CurAST.get(), MSSA);
   } while (!PreorderLoops.empty());
 
   if (!Changed)
@@ -344,6 +403,14 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
 
   PreservedAnalyses PA;
   PA.preserveSet<CFGAnalyses>();
+
+  if (MSSA) {
+    PA.preserve<MemorySSAAnalysis>();
+
+    if (VerifyMemorySSA)
+      MSSA->verifyMemorySSA();
+  }
+
   return PA;
 }
 
@@ -358,19 +425,46 @@ struct LegacyLoopSinkPass : public LoopPass {
     if (skipLoop(L))
       return false;
 
+    BasicBlock *Preheader = L->getLoopPreheader();
+    if (!Preheader)
+      return false;
+
+    // Enable LoopSink only when runtime profile is available.
+    // With static profile, the sinking decision may be sub-optimal.
+    if (!Preheader->getParent()->hasProfileData())
+      return false;
+
+    AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
     auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
-    return sinkLoopInvariantInstructions(
-        *L, getAnalysis<AAResultsWrapperPass>().getAAResults(),
-        getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
+    std::unique_ptr<AliasSetTracker> CurAST;
+    MemorySSA *MSSA = nullptr;
+    if (EnableMSSAInLegacyLoopSink)
+      MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+    else {
+      CurAST = std::make_unique<AliasSetTracker>(AA);
+      computeAliasSet(*L, *Preheader, *CurAST.get());
+    }
+
+    bool Changed = sinkLoopInvariantInstructions(
+        *L, AA, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
         getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
         getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(),
-        SE ? &SE->getSE() : nullptr);
+        SE ? &SE->getSE() : nullptr, CurAST.get(), MSSA);
+
+    if (MSSA && VerifyMemorySSA)
+      MSSA->verifyMemorySSA();
+
+    return Changed;
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
     AU.addRequired<BlockFrequencyInfoWrapperPass>();
     getLoopAnalysisUsage(AU);
+    if (EnableMSSAInLegacyLoopSink) {
+      AU.addRequired<MemorySSAWrapperPass>();
+      AU.addPreserved<MemorySSAWrapperPass>();
+    }
   }
 };
 }
@@ -380,6 +474,7 @@ INITIALIZE_PASS_BEGIN(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false,
                       false)
 INITIALIZE_PASS_DEPENDENCY(LoopPass)
 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
 INITIALIZE_PASS_END(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false, false)
 
 Pass *llvm::createLoopSinkPass() { return new LegacyLoopSinkPass(); }

diff  --git a/llvm/test/Transforms/LICM/loopsink-pr38462.ll b/llvm/test/Transforms/LICM/loopsink-pr38462.ll
index 146e2506b7eb..3dd6397bf5ba 100644
--- a/llvm/test/Transforms/LICM/loopsink-pr38462.ll
+++ b/llvm/test/Transforms/LICM/loopsink-pr38462.ll
@@ -1,4 +1,7 @@
 ; RUN: opt -S -loop-sink < %s | FileCheck %s
+; RUN: opt -S -verify-memoryssa -enable-mssa-in-legacy-loop-sink -loop-sink < %s | FileCheck %s
+; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
+; RUN: opt -S -verify-memoryssa -enable-mssa-in-loop-sink -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
 
 target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-windows-msvc19.13.26128"

diff  --git a/llvm/test/Transforms/LICM/loopsink-pr39570.ll b/llvm/test/Transforms/LICM/loopsink-pr39570.ll
index 65d3e1f51395..893331fc7ce4 100644
--- a/llvm/test/Transforms/LICM/loopsink-pr39570.ll
+++ b/llvm/test/Transforms/LICM/loopsink-pr39570.ll
@@ -1,4 +1,7 @@
 ; RUN: opt -S -loop-sink < %s | FileCheck %s
+; RUN: opt -S -verify-memoryssa -enable-mssa-in-legacy-loop-sink -loop-sink < %s | FileCheck %s
+; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
+; RUN: opt -S -verify-memoryssa -enable-mssa-in-loop-sink -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
 
 ; CHECK: pr39570
 ; Make sure not to assert.

diff  --git a/llvm/test/Transforms/LICM/loopsink-pr39695.ll b/llvm/test/Transforms/LICM/loopsink-pr39695.ll
index 4d33210352de..71c36dc2259d 100644
--- a/llvm/test/Transforms/LICM/loopsink-pr39695.ll
+++ b/llvm/test/Transforms/LICM/loopsink-pr39695.ll
@@ -1,4 +1,7 @@
 ; RUN: opt -S -loop-sink < %s | FileCheck %s
+; RUN: opt -S -verify-memoryssa -enable-mssa-in-legacy-loop-sink -loop-sink < %s | FileCheck %s
+; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
+; RUN: opt -S -verify-memoryssa -enable-mssa-in-loop-sink -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
 
 ; The load instruction should not be sunk into following loop.
 ; CHECK:      @foo

diff  --git a/llvm/test/Transforms/LICM/loopsink.ll b/llvm/test/Transforms/LICM/loopsink.ll
index f9bdbae06cd4..7982fd2957f2 100644
--- a/llvm/test/Transforms/LICM/loopsink.ll
+++ b/llvm/test/Transforms/LICM/loopsink.ll
@@ -1,5 +1,7 @@
 ; RUN: opt -S -loop-sink < %s | FileCheck %s
+; RUN: opt -S -verify-memoryssa -enable-mssa-in-legacy-loop-sink -loop-sink < %s | FileCheck %s
 ; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
+; RUN: opt -S -verify-memoryssa -enable-mssa-in-loop-sink -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
 
 @g = global i32 0, align 4
 


        


More information about the llvm-commits mailing list