[llvm] 1cfa986 - [SLP] Optionally preserve MemorySSA

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 15 16:37:55 PDT 2022


Author: Philip Reames
Date: 2022-03-15T16:36:15-07:00
New Revision: 1cfa986d68e2f04854ef30c432b8aa28e13a9706

URL: https://github.com/llvm/llvm-project/commit/1cfa986d68e2f04854ef30c432b8aa28e13a9706
DIFF: https://github.com/llvm/llvm-project/commit/1cfa986d68e2f04854ef30c432b8aa28e13a9706.diff

LOG: [SLP] Optionally preserve MemorySSA

This initial patch adds code to preserve MemorySSA through a run of SLP vectorizer. The eventual plan is to use MemorySSA to accelerate SLP's memory dependence checking, but we're a ways from that.  In particular, this patch is correct, but really slow. It's being landed so that we can work incrementally in tree, not because it's expected to be useful to anyone just yet.

The broader effort is being tracked in https://github.com/llvm/llvm-project/issues/54256.  Its worth noting expicitly that this may not work out, and if not, we will be reverting all of the MSSA support in SLP at some point in the next few weeks.

Differential Revision: https://reviews.llvm.org/D117926

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index 1792b241bd92a..e79b4174c1745 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -37,6 +37,7 @@ class InsertElementInst;
 class InsertValueInst;
 class Instruction;
 class LoopInfo;
+class MemorySSA;
 class OptimizationRemarkEmitter;
 class PHINode;
 class ScalarEvolution;
@@ -67,6 +68,7 @@ struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
   DominatorTree *DT = nullptr;
   AssumptionCache *AC = nullptr;
   DemandedBits *DB = nullptr;
+  MemorySSA *MSSA = nullptr; // nullable, currently preserved, but not used
   const DataLayout *DL = nullptr;
 
 public:
@@ -76,7 +78,7 @@ struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
   bool runImpl(Function &F, ScalarEvolution *SE_, TargetTransformInfo *TTI_,
                TargetLibraryInfo *TLI_, AAResults *AA_, LoopInfo *LI_,
                DominatorTree *DT_, AssumptionCache *AC_, DemandedBits *DB_,
-               OptimizationRemarkEmitter *ORE_);
+               MemorySSA *MSSA_, OptimizationRemarkEmitter *ORE_);
 
 private:
   /// Collect store and getelementptr instructions and organize them

diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5bce8bed9930b..90114f934fcaa 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -41,6 +41,8 @@
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -166,6 +168,10 @@ static cl::opt<bool>
     ViewSLPTree("view-slp-tree", cl::Hidden,
                 cl::desc("Display the SLP trees with Graphviz"));
 
+static cl::opt<bool> EnableMSSAInSLPVectorizer(
+    "enable-mssa-in-slp-vectorizer", cl::Hidden, cl::init(false),
+    cl::desc("Enable MemorySSA for SLPVectorizer in new pass manager"));
+
 // Limit the number of alias checks. The limit is chosen so that
 // it has no negative effect on the llvm benchmarks.
 static const unsigned AliasedCheckLimit = 10;
@@ -789,9 +795,10 @@ class BoUpSLP {
   BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
           TargetLibraryInfo *TLi, AAResults *Aa, LoopInfo *Li,
           DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB,
-          const DataLayout *DL, OptimizationRemarkEmitter *ORE)
+          MemorySSA *MSSA, const DataLayout *DL, OptimizationRemarkEmitter *ORE)
       : BatchAA(*Aa), F(Func), SE(Se), TTI(Tti), TLI(TLi), LI(Li),
-        DT(Dt), AC(AC), DB(DB), DL(DL), ORE(ORE), Builder(Se->getContext()) {
+        DT(Dt), AC(AC), DB(DB), MSSA(MSSA), DL(DL), ORE(ORE),
+        Builder(Se->getContext()) {
     CodeMetrics::collectEphemeralValues(F, AC, EphValues);
     // Use the vector register size specified by the target unless overridden
     // by a command-line option.
@@ -2979,6 +2986,7 @@ class BoUpSLP {
   DominatorTree *DT;
   AssumptionCache *AC;
   DemandedBits *DB;
+  MemorySSA *MSSA;
   const DataLayout *DL;
   OptimizationRemarkEmitter *ORE;
 
@@ -3091,6 +3099,13 @@ template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
 } // end namespace llvm
 
 BoUpSLP::~BoUpSLP() {
+  if (MSSA) {
+    MemorySSAUpdater MSSAU(MSSA);
+    for (const auto &Pair : DeletedInstructions) {
+      if (auto *Access = MSSA->getMemoryAccess(Pair.first))
+        MSSAU.removeMemoryAccess(Access);
+    }
+  }
   for (const auto &Pair : DeletedInstructions) {
     // Replace operands of ignored instructions with Undefs in case if they were
     // marked for deletion.
@@ -6786,6 +6801,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       auto *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace());
       Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
       LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign());
+      if (MSSA) {
+        MemorySSAUpdater MSSAU(MSSA);
+        auto *Access = MSSA->getMemoryAccess(LI);
+        assert(Access);
+        MemoryUseOrDef *NewAccess =
+          MSSAU.createMemoryAccessBefore(V, Access->getDefiningAccess(),
+                                         Access);
+        MSSAU.insertUse(cast<MemoryUse>(NewAccess), true);
+      }
       Value *NewV = propagateMetadata(V, E->Scalars);
       ShuffleBuilder.addInversedMask(E->ReorderIndices);
       ShuffleBuilder.addMask(E->ReuseShuffleIndices);
@@ -7035,6 +7059,17 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
               commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
         NewLI = Builder.CreateMaskedGather(VecTy, VecPtr, CommonAlignment);
       }
+
+      if (MSSA) {
+        MemorySSAUpdater MSSAU(MSSA);
+        auto *Access = MSSA->getMemoryAccess(LI);
+        assert(Access);
+        MemoryUseOrDef *NewAccess =
+          MSSAU.createMemoryAccessAfter(NewLI, Access->getDefiningAccess(),
+                                        Access);
+        MSSAU.insertUse(cast<MemoryUse>(NewAccess), true);
+      }
+
       Value *V = propagateMetadata(NewLI, E->Scalars);
 
       ShuffleBuilder.addInversedMask(E->ReorderIndices);
@@ -7060,6 +7095,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       StoreInst *ST =
           Builder.CreateAlignedStore(VecValue, VecPtr, SI->getAlign());
 
+      if (MSSA) {
+        MemorySSAUpdater MSSAU(MSSA);
+        auto *Access = MSSA->getMemoryAccess(SI);
+        assert(Access);
+        MemoryUseOrDef *NewAccess =
+          MSSAU.createMemoryAccessAfter(ST, Access->getDefiningAccess(),
+                                        Access);
+        MSSAU.insertDef(cast<MemoryDef>(NewAccess), true);
+      }
+
       // The pointer operand uses an in-tree scalar, so add the new BitCast or
       // StoreInst to ExternalUses to make sure that an extract will be
       // generated in the future.
@@ -8024,6 +8069,15 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
   BS->initialFillReadyList(ReadyInsts);
 
   Instruction *LastScheduledInst = BS->ScheduleEnd;
+  MemoryAccess *MemInsertPt = nullptr;
+  if (MSSA) {
+    for (auto I = LastScheduledInst->getIterator(); I != BS->BB->end(); I++) {
+      if (auto *Access = MSSA->getMemoryAccess(&*I)) {
+        MemInsertPt = Access;
+        break;
+      }
+    }
+  }
 
   // Do the "real" scheduling.
   while (!ReadyInsts.empty()) {
@@ -8035,9 +8089,24 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
     for (ScheduleData *BundleMember = picked; BundleMember;
          BundleMember = BundleMember->NextInBundle) {
       Instruction *pickedInst = BundleMember->Inst;
-      if (pickedInst->getNextNode() != LastScheduledInst)
+      if (pickedInst->getNextNode() != LastScheduledInst) {
         pickedInst->moveBefore(LastScheduledInst);
+        if (MSSA) {
+          MemorySSAUpdater MSSAU(MSSA);
+          if (auto *Access = MSSA->getMemoryAccess(pickedInst)) {
+            if (MemInsertPt)
+              MSSAU.moveBefore(Access, cast<MemoryUseOrDef>(MemInsertPt));
+            else
+              MSSAU.moveToPlace(Access, BS->BB,
+                                MemorySSA::InsertionPlace::End);
+          }
+        }
+      }
+
       LastScheduledInst = pickedInst;
+      if (MSSA)
+        if (auto *Access = MSSA->getMemoryAccess(LastScheduledInst))
+          MemInsertPt = Access;
     }
 
     BS->schedule(picked, ReadyInsts);
@@ -8383,7 +8452,7 @@ struct SLPVectorizer : public FunctionPass {
     auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
     auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
 
-    return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, ORE);
+    return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, /*MSSA*/nullptr, ORE);
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -8417,13 +8486,21 @@ PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &A
   auto *AC = &AM.getResult<AssumptionAnalysis>(F);
   auto *DB = &AM.getResult<DemandedBitsAnalysis>(F);
   auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  auto *MSSA = EnableMSSAInSLPVectorizer ?
+    &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : (MemorySSA*)nullptr;
 
-  bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, ORE);
+  bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, MSSA, ORE);
   if (!Changed)
     return PreservedAnalyses::all();
 
   PreservedAnalyses PA;
   PA.preserveSet<CFGAnalyses>();
+  if (MSSA) {
+#ifdef EXPENSIVE_CHECKS
+    MSSA->verifyMemorySSA();
+#endif
+    PA.preserve<MemorySSAAnalysis>();
+  }
   return PA;
 }
 
@@ -8432,6 +8509,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
                                 TargetLibraryInfo *TLI_, AAResults *AA_,
                                 LoopInfo *LI_, DominatorTree *DT_,
                                 AssumptionCache *AC_, DemandedBits *DB_,
+                                MemorySSA *MSSA,
                                 OptimizationRemarkEmitter *ORE_) {
   if (!RunSLPVectorization)
     return false;
@@ -8465,7 +8543,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
 
   // Use the bottom up slp vectorizer to construct chains that start with
   // store instructions.
-  BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL, ORE_);
+  BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, MSSA, DL, ORE_);
 
   // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
   // delete instructions.


        


More information about the llvm-commits mailing list