[polly] deb00cf - [Polly][NewPM] Port Simplify to the new pass manager

Sun Sep 20 19:18:22 PDT 2020

Author: Pengxuan Zheng
Date: 2020-09-20T19:18:01-07:00
New Revision: deb00cf0b5abd45e160f1e0ff3e3dcce97b071e5

URL: https://github.com/llvm/llvm-project/commit/deb00cf0b5abd45e160f1e0ff3e3dcce97b071e5
DIFF: https://github.com/llvm/llvm-project/commit/deb00cf0b5abd45e160f1e0ff3e3dcce97b071e5.diff

LOG: [Polly][NewPM] Port Simplify to the new pass manager

Reviewed By: Meinersbur

Differential Revision: https://reviews.llvm.org/D87328

Added: 
    

Modified: 
    polly/include/polly/Simplify.h
    polly/lib/Support/PollyPasses.def
    polly/lib/Support/RegisterPasses.cpp
    polly/lib/Transform/Simplify.cpp
    polly/test/Simplify/dead_access_load.ll
    polly/test/Simplify/dead_access_phi.ll
    polly/test/Simplify/dead_access_value.ll
    polly/test/Simplify/dead_instruction.ll
    polly/test/Simplify/notdead_region_exitphi.ll
    polly/test/Simplify/notdead_region_innerphi.ll
    polly/test/Simplify/notredundant_region_middle.ll
    polly/test/Simplify/notredundant_synthesizable_unknownit.ll
    polly/test/Simplify/overwritten.ll
    polly/test/Simplify/overwritten_3store.ll
    polly/test/Simplify/overwritten_loadbetween.ll
    polly/test/Simplify/pass_existence.ll
    polly/test/Simplify/phi_in_regionstmt.ll
    polly/test/Simplify/redundant.ll
    polly/test/Simplify/redundant_differentindex.ll
    polly/test/Simplify/redundant_storebetween.ll

Removed: 
    


################################################################################
diff  --git a/polly/include/polly/Simplify.h b/polly/include/polly/Simplify.h
index fb6a0152cdc0..aa8c8a2cd2e3 100644

--- a/polly/include/polly/Simplify.h
+++ b/polly/include/polly/Simplify.h
@@ -13,13 +13,109 @@
 #ifndef POLLY_TRANSFORM_SIMPLIFY_H
 #define POLLY_TRANSFORM_SIMPLIFY_H
 
+#include "polly/ScopPass.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/PassManager.h"
 
 namespace llvm {
 class PassRegistry;
 class Pass;
 } // namespace llvm
 
+namespace {
+class SimplifyVisitor {
+private:
+  /// The invocation id (if there are multiple instances in the pass manager's
+  /// pipeline) to determine which statistics to update.
+  int CallNo;
+
+  /// The last/current SCoP that is/has been processed.
+  Scop *S;
+
+  /// Number of statements with empty domains removed from the SCoP.
+  int EmptyDomainsRemoved = 0;
+
+  /// Number of writes that are overwritten anyway.
+  int OverwritesRemoved = 0;
+
+  /// Number of combined writes.
+  int WritesCoalesced = 0;
+
+  /// Number of redundant writes removed from this SCoP.
+  int RedundantWritesRemoved = 0;
+
+  /// Number of writes with empty access domain removed.
+  int EmptyPartialAccessesRemoved = 0;
+
+  /// Number of unused accesses removed from this SCoP.
+  int DeadAccessesRemoved = 0;
+
+  /// Number of unused instructions removed from this SCoP.
+  int DeadInstructionsRemoved = 0;
+
+  /// Number of unnecessary statements removed from the SCoP.
+  int StmtsRemoved = 0;
+
+  /// Return whether at least one simplification has been applied.
+  bool isModified() const;
+
+  /// Remove statements that are never executed due to their domains being
+  /// empty.
+  ///
+  /// In contrast to Scop::simplifySCoP, this removes based on the SCoP's
+  /// effective domain, i.e. including the SCoP's context as used by some other
+  /// simplification methods in this pass. This is necessary because the
+  /// analysis on empty domains is unreliable, e.g. remove a scalar value
+  /// definition MemoryAccesses, but not its use.
+  void removeEmptyDomainStmts();
+
+  /// Remove writes that are overwritten unconditionally later in the same
+  /// statement.
+  ///
+  /// There must be no read of the same value between the write (that is to be
+  /// removed) and the overwrite.
+  void removeOverwrites();
+
+  /// Combine writes that write the same value if possible.
+  ///
+  /// This function is able to combine:
+  /// - Partial writes with disjoint domain.
+  /// - Writes that write to the same array element.
+  ///
+  /// In all cases, both writes must write the same values.
+  void coalesceWrites();
+
+  /// Remove writes that just write the same value already stored in the
+  /// element.
+  void removeRedundantWrites();
+
+  /// Remove statements without side effects.
+  void removeUnnecessaryStmts();
+
+  /// Remove accesses that have an empty domain.
+  void removeEmptyPartialAccesses();
+
+  /// Mark all reachable instructions and access, and sweep those that are not
+  /// reachable.
+  void markAndSweep(LoopInfo *LI);
+
+  /// Print simplification statistics to @p OS.
+  void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const;
+
+  /// Print the current state of all MemoryAccesses to @p OS.
+  void printAccesses(llvm::raw_ostream &OS, int Indent = 0) const;
+
+public:
+  explicit SimplifyVisitor(int CallNo = 0) : CallNo(CallNo) {}
+
+  bool visit(Scop &S, LoopInfo *LI);
+
+  void printScop(raw_ostream &OS, Scop &S) const;
+
+  void releaseMemory();
+};
+} // namespace
+
 namespace polly {
 
 class MemoryAccess;
@@ -50,10 +146,29 @@ llvm::SmallVector<MemoryAccess *, 32> getAccessesInOrder(ScopStmt &Stmt);
 ///
 /// @return The Simplify pass.
 llvm::Pass *createSimplifyPass(int CallNo = 0);
+
+struct SimplifyPass : public PassInfoMixin<SimplifyPass> {
+  SimplifyPass(int CallNo = 0) : Imp(CallNo) {}
+
+  llvm::PreservedAnalyses run(Scop &S, ScopAnalysisManager &SAM,
+                              ScopStandardAnalysisResults &AR, SPMUpdater &U);
+
+  SimplifyVisitor Imp;
+};
+
+struct SimplifyPrinterPass : public PassInfoMixin<SimplifyPrinterPass> {
+  SimplifyPrinterPass(raw_ostream &OS, int CallNo = 0) : OS(OS), Imp(CallNo) {}
+
+  PreservedAnalyses run(Scop &S, ScopAnalysisManager &,
+                        ScopStandardAnalysisResults &, SPMUpdater &);
+
+  raw_ostream &OS;
+  SimplifyVisitor Imp;
+};
 } // namespace polly
 
 namespace llvm {
-void initializeSimplifyPass(llvm::PassRegistry &);
+void initializeSimplifyLegacyPassPass(llvm::PassRegistry &);
 } // namespace llvm
 
 #endif /* POLLY_TRANSFORM_SIMPLIFY_H */

diff  --git a/polly/lib/Support/PollyPasses.def b/polly/lib/Support/PollyPasses.def
index b07f928639c1..2c0592af28cc 100644
--- a/polly/lib/Support/PollyPasses.def
+++ b/polly/lib/Support/PollyPasses.def
@@ -28,4 +28,6 @@ SCOP_PASS("polly-import-jscop", JSONImportPass())
 SCOP_PASS("print<polly-ast>", IslAstPrinterPass(outs()))
 SCOP_PASS("print<polly-dependences>", DependenceInfoPrinterPass(outs()))
 SCOP_PASS("polly-codegen", CodeGenerationPass())
+SCOP_PASS("polly-simplify", SimplifyPass())
+SCOP_PASS("print<polly-simplify>", SimplifyPrinterPass(outs()))
 #undef SCOP_PASS

diff  --git a/polly/lib/Support/RegisterPasses.cpp b/polly/lib/Support/RegisterPasses.cpp
index 4ceca070b37f..ad6edb5807cc 100644
--- a/polly/lib/Support/RegisterPasses.cpp
+++ b/polly/lib/Support/RegisterPasses.cpp
@@ -284,7 +284,7 @@ void initializePollyPasses(PassRegistry &Registry) {
   initializeFlattenSchedulePass(Registry);
   initializeForwardOpTreePass(Registry);
   initializeDeLICMPass(Registry);
-  initializeSimplifyPass(Registry);
+  initializeSimplifyLegacyPassPass(Registry);
   initializeDumpModulePass(Registry);
   initializePruneUnprofitablePass(Registry);
 }

diff  --git a/polly/lib/Transform/Simplify.cpp b/polly/lib/Transform/Simplify.cpp
index d699aa4f4990..bad17ce92c45 100644
--- a/polly/lib/Transform/Simplify.cpp
+++ b/polly/lib/Transform/Simplify.cpp
@@ -117,538 +117,573 @@ static isl::union_map underapproximatedAddMap(isl::union_map UMap,
   return UResult;
 }
 
-class Simplify : public ScopPass {
-private:
-  /// The invocation id (if there are multiple instances in the pass manager's
-  /// pipeline) to determine which statistics to update.
-  int CallNo;
-
-  /// The last/current SCoP that is/has been processed.
-  Scop *S;
+/// Return whether at least one simplification has been applied.
+bool SimplifyVisitor::isModified() const {
+  return EmptyDomainsRemoved > 0 || OverwritesRemoved > 0 ||
+         WritesCoalesced > 0 || RedundantWritesRemoved > 0 ||
+         EmptyPartialAccessesRemoved > 0 || DeadAccessesRemoved > 0 ||
+         DeadInstructionsRemoved > 0 || StmtsRemoved > 0;
+}
 
-  /// Number of statements with empty domains removed from the SCoP.
-  int EmptyDomainsRemoved = 0;
+/// Remove statements that are never executed due to their domains being
+/// empty.
+///
+/// In contrast to Scop::simplifySCoP, this removes based on the SCoP's
+/// effective domain, i.e. including the SCoP's context as used by some other
+/// simplification methods in this pass. This is necessary because the
+/// analysis on empty domains is unreliable, e.g. remove a scalar value
+/// definition MemoryAccesses, but not its use.
+void SimplifyVisitor::removeEmptyDomainStmts() {
+  size_t NumStmtsBefore = S->getSize();
+
+  S->removeStmts([](ScopStmt &Stmt) -> bool {
+    auto EffectiveDomain =
+        Stmt.getDomain().intersect_params(Stmt.getParent()->getContext());
+    return EffectiveDomain.is_empty();
+  });
+
+  assert(NumStmtsBefore >= S->getSize());
+  EmptyDomainsRemoved = NumStmtsBefore - S->getSize();
+  LLVM_DEBUG(dbgs() << "Removed " << EmptyDomainsRemoved << " (of "
+                    << NumStmtsBefore << ") statements with empty domains \n");
+  TotalEmptyDomainsRemoved[CallNo] += EmptyDomainsRemoved;
+}
 
-  /// Number of writes that are overwritten anyway.
-  int OverwritesRemoved = 0;
+/// Remove writes that are overwritten unconditionally later in the same
+/// statement.
+///
+/// There must be no read of the same value between the write (that is to be
+/// removed) and the overwrite.
+void SimplifyVisitor::removeOverwrites() {
+  for (auto &Stmt : *S) {
+    isl::set Domain = Stmt.getDomain();
+    isl::union_map WillBeOverwritten =
+        isl::union_map::empty(S->getParamSpace());
+
+    SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt));
+
+    // Iterate in reverse order, so the overwrite comes before the write that
+    // is to be removed.
+    for (auto *MA : reverse(Accesses)) {
+
+      // In region statements, the explicit accesses can be in blocks that are
+      // can be executed in any order. We therefore process only the implicit
+      // writes and stop after that.
+      if (Stmt.isRegionStmt() && isExplicitAccess(MA))
+        break;
+
+      auto AccRel = MA->getAccessRelation();
+      AccRel = AccRel.intersect_domain(Domain);
+      AccRel = AccRel.intersect_params(S->getContext());
+
+      // If a value is read in-between, do not consider it as overwritten.
+      if (MA->isRead()) {
+        // Invalidate all overwrites for the array it accesses to avoid too
+        // complex isl sets.
+        isl::map AccRelUniv = isl::map::universe(AccRel.get_space());
+        WillBeOverwritten = WillBeOverwritten.subtract(AccRelUniv);
+        continue;
+      }
 
-  /// Number of combined writes.
-  int WritesCoalesced = 0;
+      // If all of a write's elements are overwritten, remove it.
+      isl::union_map AccRelUnion = AccRel;
+      if (AccRelUnion.is_subset(WillBeOverwritten)) {
+        LLVM_DEBUG(dbgs() << "Removing " << MA
+                          << " which will be overwritten anyway\n");
 
-  /// Number of redundant writes removed from this SCoP.
-  int RedundantWritesRemoved = 0;
+        Stmt.removeSingleMemoryAccess(MA);
+        OverwritesRemoved++;
+        TotalOverwritesRemoved[CallNo]++;
+      }
 
-  /// Number of writes with empty access domain removed.
-  int EmptyPartialAccessesRemoved = 0;
+      // Unconditional writes overwrite other values.
+      if (MA->isMustWrite()) {
+        // Avoid too complex isl sets. If necessary, throw away some of the
+        // knowledge.
+        WillBeOverwritten = underapproximatedAddMap(WillBeOverwritten, AccRel);
+      }
+    }
+  }
+}
 
-  /// Number of unused accesses removed from this SCoP.
-  int DeadAccessesRemoved = 0;
+/// Combine writes that write the same value if possible.
+///
+/// This function is able to combine:
+/// - Partial writes with disjoint domain.
+/// - Writes that write to the same array element.
+///
+/// In all cases, both writes must write the same values.
+void SimplifyVisitor::coalesceWrites() {
+  for (auto &Stmt : *S) {
+    isl::set Domain = Stmt.getDomain().intersect_params(S->getContext());
+
+    // We let isl do the lookup for the same-value condition. For this, we
+    // wrap llvm::Value into an isl::set such that isl can do the lookup in
+    // its hashtable implementation. llvm::Values are only compared within a
+    // ScopStmt, so the map can be local to this scope. TODO: Refactor with
+    // ZoneAlgorithm::makeValueSet()
+    SmallDenseMap<Value *, isl::set> ValueSets;
+    auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set {
+      assert(V);
+      isl::set &Result = ValueSets[V];
+      if (Result.is_null()) {
+        isl::ctx Ctx = S->getIslCtx();
+        std::string Name = getIslCompatibleName(
+            "Val", V, ValueSets.size() - 1, std::string(), UseInstructionNames);
+        isl::id Id = isl::id::alloc(Ctx, Name, V);
+        Result = isl::set::universe(
+            isl::space(Ctx, 0, 0).set_tuple_id(isl::dim::set, Id));
+      }
+      return Result;
+    };
+
+    // List of all eligible (for coalescing) writes of the future.
+    // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] }
+    isl::union_map FutureWrites = isl::union_map::empty(S->getParamSpace());
+
+    // Iterate over accesses from the last to the first.
+    SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt));
+    for (MemoryAccess *MA : reverse(Accesses)) {
+      // In region statements, the explicit accesses can be in blocks that can
+      // be executed in any order. We therefore process only the implicit
+      // writes and stop after that.
+      if (Stmt.isRegionStmt() && isExplicitAccess(MA))
+        break;
+
+      // { Domain[] -> Element[] }
+      isl::map AccRel = MA->getLatestAccessRelation().intersect_domain(Domain);
+
+      // { [Domain[] -> Element[]] }
+      isl::set AccRelWrapped = AccRel.wrap();
+
+      // { Value[] }
+      isl::set ValSet;
+
+      if (MA->isMustWrite() && (MA->isOriginalScalarKind() ||
+                                isa<StoreInst>(MA->getAccessInstruction()))) {
+        // Normally, tryGetValueStored() should be used to determine which
+        // element is written, but it can return nullptr; For PHI accesses,
+        // getAccessValue() returns the PHI instead of the PHI's incoming
+        // value. In this case, where we only compare values of a single
+        // statement, this is fine, because within a statement, a PHI in a
+        // successor block has always the same value as the incoming write. We
+        // still preferably use the incoming value directly so we also catch
+        // direct uses of that.
+        Value *StoredVal = MA->tryGetValueStored();
+        if (!StoredVal)
+          StoredVal = MA->getAccessValue();
+        ValSet = makeValueSet(StoredVal);
+
+        // { Domain[] }
+        isl::set AccDomain = AccRel.domain();
+
+        // Parts of the statement's domain that is not written by this access.
+        isl::set UndefDomain = Domain.subtract(AccDomain);
+
+        // { Element[] }
+        isl::set ElementUniverse =
+            isl::set::universe(AccRel.get_space().range());
 
-  /// Number of unused instructions removed from this SCoP.
-  int DeadInstructionsRemoved = 0;
+        // { Domain[] -> Element[] }
+        isl::map UndefAnything =
+            isl::map::from_domain_and_range(UndefDomain, ElementUniverse);
+
+        // We are looking a compatible write access. The other write can
+        // access these elements...
+        isl::map AllowedAccesses = AccRel.unite(UndefAnything);
+
+        // ... and must write the same value.
+        // { [Domain[] -> Element[]] -> Value[] }
+        isl::map Filter =
+            isl::map::from_domain_and_range(AllowedAccesses.wrap(), ValSet);
+
+        // Lookup future write that fulfills these conditions.
+        // { [[Domain[] -> Element[]] -> Value[]] -> MemoryAccess[] }
+        isl::union_map Filtered =
+            FutureWrites.uncurry().intersect_domain(Filter.wrap());
+
+        // Iterate through the candidates.
+        for (isl::map Map : Filtered.get_map_list()) {
+          MemoryAccess *OtherMA = (MemoryAccess *)Map.get_space()
+                                      .get_tuple_id(isl::dim::out)
+                                      .get_user();
+
+          isl::map OtherAccRel =
+              OtherMA->getLatestAccessRelation().intersect_domain(Domain);
+
+          // The filter only guaranteed that some of OtherMA's accessed
+          // elements are allowed. Verify that it only accesses allowed
+          // elements. Otherwise, continue with the next candidate.
+          if (!OtherAccRel.is_subset(AllowedAccesses).is_true())
+            continue;
+
+          // The combined access relation.
+          // { Domain[] -> Element[] }
+          isl::map NewAccRel = AccRel.unite(OtherAccRel);
+          simplify(NewAccRel);
 
-  /// Number of unnecessary statements removed from the SCoP.
-  int StmtsRemoved = 0;
+          // Carry out the coalescing.
+          Stmt.removeSingleMemoryAccess(MA);
+          OtherMA->setNewAccessRelation(NewAccRel);
 
-  /// Return whether at least one simplification has been applied.
-  bool isModified() const {
-    return EmptyDomainsRemoved > 0 || OverwritesRemoved > 0 ||
-           WritesCoalesced > 0 || RedundantWritesRemoved > 0 ||
-           EmptyPartialAccessesRemoved > 0 || DeadAccessesRemoved > 0 ||
-           DeadInstructionsRemoved > 0 || StmtsRemoved > 0;
-  }
+          // We removed MA, OtherMA takes its role.
+          MA = OtherMA;
 
-  /// Remove statements that are never executed due to their domains being
-  /// empty.
-  ///
-  /// In contrast to Scop::simplifySCoP, this removes based on the SCoP's
-  /// effective domain, i.e. including the SCoP's context as used by some other
-  /// simplification methods in this pass. This is necessary because the
-  /// analysis on empty domains is unreliable, e.g. remove a scalar value
-  /// definition MemoryAccesses, but not its use.
-  void removeEmptyDomainStmts() {
-    size_t NumStmtsBefore = S->getSize();
-
-    S->removeStmts([](ScopStmt &Stmt) -> bool {
-      auto EffectiveDomain =
-          Stmt.getDomain().intersect_params(Stmt.getParent()->getContext());
-      return EffectiveDomain.is_empty();
-    });
-
-    assert(NumStmtsBefore >= S->getSize());
-    EmptyDomainsRemoved = NumStmtsBefore - S->getSize();
-    LLVM_DEBUG(dbgs() << "Removed " << EmptyDomainsRemoved << " (of "
-                      << NumStmtsBefore
-                      << ") statements with empty domains \n");
-    TotalEmptyDomainsRemoved[CallNo] += EmptyDomainsRemoved;
-  }
+          TotalWritesCoalesced[CallNo]++;
+          WritesCoalesced++;
 
-  /// Remove writes that are overwritten unconditionally later in the same
-  /// statement.
-  ///
-  /// There must be no read of the same value between the write (that is to be
-  /// removed) and the overwrite.
-  void removeOverwrites() {
-    for (auto &Stmt : *S) {
-      isl::set Domain = Stmt.getDomain();
-      isl::union_map WillBeOverwritten =
-          isl::union_map::empty(S->getParamSpace());
-
-      SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt));
-
-      // Iterate in reverse order, so the overwrite comes before the write that
-      // is to be removed.
-      for (auto *MA : reverse(Accesses)) {
-
-        // In region statements, the explicit accesses can be in blocks that are
-        // can be executed in any order. We therefore process only the implicit
-        // writes and stop after that.
-        if (Stmt.isRegionStmt() && isExplicitAccess(MA))
+          // Don't look for more candidates.
           break;
-
-        auto AccRel = MA->getAccessRelation();
-        AccRel = AccRel.intersect_domain(Domain);
-        AccRel = AccRel.intersect_params(S->getContext());
-
-        // If a value is read in-between, do not consider it as overwritten.
-        if (MA->isRead()) {
-          // Invalidate all overwrites for the array it accesses to avoid too
-          // complex isl sets.
-          isl::map AccRelUniv = isl::map::universe(AccRel.get_space());
-          WillBeOverwritten = WillBeOverwritten.subtract(AccRelUniv);
-          continue;
         }
+      }
 
-        // If all of a write's elements are overwritten, remove it.
-        isl::union_map AccRelUnion = AccRel;
-        if (AccRelUnion.is_subset(WillBeOverwritten)) {
-          LLVM_DEBUG(dbgs() << "Removing " << MA
-                            << " which will be overwritten anyway\n");
+      // Two writes cannot be coalesced if there is another access (to some of
+      // the written elements) between them. Remove all visited write accesses
+      // from the list of eligible writes. Don't just remove the accessed
+      // elements, but any MemoryAccess that touches any of the invalidated
+      // elements.
+      SmallPtrSet<MemoryAccess *, 2> TouchedAccesses;
+      for (isl::map Map :
+           FutureWrites.intersect_domain(AccRelWrapped).get_map_list()) {
+        MemoryAccess *MA = (MemoryAccess *)Map.get_space()
+                               .range()
+                               .unwrap()
+                               .get_tuple_id(isl::dim::out)
+                               .get_user();
+        TouchedAccesses.insert(MA);
+      }
+      isl::union_map NewFutureWrites =
+          isl::union_map::empty(FutureWrites.get_space());
+      for (isl::map FutureWrite : FutureWrites.get_map_list()) {
+        MemoryAccess *MA = (MemoryAccess *)FutureWrite.get_space()
+                               .range()
+                               .unwrap()
+                               .get_tuple_id(isl::dim::out)
+                               .get_user();
+        if (!TouchedAccesses.count(MA))
+          NewFutureWrites = NewFutureWrites.add_map(FutureWrite);
+      }
+      FutureWrites = NewFutureWrites;
 
-          Stmt.removeSingleMemoryAccess(MA);
-          OverwritesRemoved++;
-          TotalOverwritesRemoved[CallNo]++;
-        }
+      if (MA->isMustWrite() && !ValSet.is_null()) {
+        // { MemoryAccess[] }
+        auto AccSet =
+            isl::set::universe(isl::space(S->getIslCtx(), 0, 0)
+                                   .set_tuple_id(isl::dim::set, MA->getId()));
 
-        // Unconditional writes overwrite other values.
-        if (MA->isMustWrite()) {
-          // Avoid too complex isl sets. If necessary, throw away some of the
-          // knowledge.
-          WillBeOverwritten =
-              underapproximatedAddMap(WillBeOverwritten, AccRel);
-        }
+        // { Val[] -> MemoryAccess[] }
+        isl::map ValAccSet = isl::map::from_domain_and_range(ValSet, AccSet);
+
+        // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] }
+        isl::map AccRelValAcc =
+            isl::map::from_domain_and_range(AccRelWrapped, ValAccSet.wrap());
+        FutureWrites = FutureWrites.add_map(AccRelValAcc);
       }
     }
   }
+}
 
-  /// Combine writes that write the same value if possible.
-  ///
-  /// This function is able to combine:
-  /// - Partial writes with disjoint domain.
-  /// - Writes that write to the same array element.
-  ///
-  /// In all cases, both writes must write the same values.
-  void coalesceWrites() {
-    for (auto &Stmt : *S) {
-      isl::set Domain = Stmt.getDomain().intersect_params(S->getContext());
-
-      // We let isl do the lookup for the same-value condition. For this, we
-      // wrap llvm::Value into an isl::set such that isl can do the lookup in
-      // its hashtable implementation. llvm::Values are only compared within a
-      // ScopStmt, so the map can be local to this scope. TODO: Refactor with
-      // ZoneAlgorithm::makeValueSet()
-      SmallDenseMap<Value *, isl::set> ValueSets;
-      auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set {
-        assert(V);
-        isl::set &Result = ValueSets[V];
-        if (Result.is_null()) {
-          isl::ctx Ctx = S->getIslCtx();
-          std::string Name =
-              getIslCompatibleName("Val", V, ValueSets.size() - 1,
-                                   std::string(), UseInstructionNames);
-          isl::id Id = isl::id::alloc(Ctx, Name, V);
-          Result = isl::set::universe(
-              isl::space(Ctx, 0, 0).set_tuple_id(isl::dim::set, Id));
-        }
-        return Result;
-      };
-
-      // List of all eligible (for coalescing) writes of the future.
-      // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] }
-      isl::union_map FutureWrites = isl::union_map::empty(S->getParamSpace());
-
-      // Iterate over accesses from the last to the first.
-      SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt));
-      for (MemoryAccess *MA : reverse(Accesses)) {
-        // In region statements, the explicit accesses can be in blocks that can
-        // be executed in any order. We therefore process only the implicit
-        // writes and stop after that.
-        if (Stmt.isRegionStmt() && isExplicitAccess(MA))
-          break;
-
-        // { Domain[] -> Element[] }
-        isl::map AccRel =
-            MA->getLatestAccessRelation().intersect_domain(Domain);
-
-        // { [Domain[] -> Element[]] }
-        isl::set AccRelWrapped = AccRel.wrap();
-
-        // { Value[] }
-        isl::set ValSet;
-
-        if (MA->isMustWrite() && (MA->isOriginalScalarKind() ||
-                                  isa<StoreInst>(MA->getAccessInstruction()))) {
-          // Normally, tryGetValueStored() should be used to determine which
-          // element is written, but it can return nullptr; For PHI accesses,
-          // getAccessValue() returns the PHI instead of the PHI's incoming
-          // value. In this case, where we only compare values of a single
-          // statement, this is fine, because within a statement, a PHI in a
-          // successor block has always the same value as the incoming write. We
-          // still preferably use the incoming value directly so we also catch
-          // direct uses of that.
-          Value *StoredVal = MA->tryGetValueStored();
-          if (!StoredVal)
-            StoredVal = MA->getAccessValue();
-          ValSet = makeValueSet(StoredVal);
-
-          // { Domain[] }
-          isl::set AccDomain = AccRel.domain();
-
-          // Parts of the statement's domain that is not written by this access.
-          isl::set UndefDomain = Domain.subtract(AccDomain);
-
-          // { Element[] }
-          isl::set ElementUniverse =
-              isl::set::universe(AccRel.get_space().range());
+/// Remove writes that just write the same value already stored in the
+/// element.
+void SimplifyVisitor::removeRedundantWrites() {
+  for (auto &Stmt : *S) {
+    SmallDenseMap<Value *, isl::set> ValueSets;
+    auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set {
+      assert(V);
+      isl::set &Result = ValueSets[V];
+      if (Result.is_null()) {
+        isl_ctx *Ctx = S->getIslCtx().get();
+        std::string Name = getIslCompatibleName(
+            "Val", V, ValueSets.size() - 1, std::string(), UseInstructionNames);
+        isl::id Id = isl::manage(isl_id_alloc(Ctx, Name.c_str(), V));
+        Result = isl::set::universe(
+            isl::space(Ctx, 0, 0).set_tuple_id(isl::dim::set, Id));
+      }
+      return Result;
+    };
+
+    isl::set Domain = Stmt.getDomain();
+    Domain = Domain.intersect_params(S->getContext());
+
+    // List of element reads that still have the same value while iterating
+    // through the MemoryAccesses.
+    // { [Domain[] -> Element[]] -> Val[] }
+    isl::union_map Known = isl::union_map::empty(S->getParamSpace());
+
+    SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt));
+    for (MemoryAccess *MA : Accesses) {
+      // Is the memory access in a defined order relative to the other
+      // accesses? In region statements, only the first and the last accesses
+      // have defined order. Execution of those in the middle may depend on
+      // runtime conditions an therefore cannot be modified.
+      bool IsOrdered =
+          Stmt.isBlockStmt() || MA->isOriginalScalarKind() ||
+          (!S->getBoxedLoops().size() && MA->getAccessInstruction() &&
+           Stmt.getEntryBlock() == MA->getAccessInstruction()->getParent());
+
+      isl::map AccRel = MA->getAccessRelation();
+      AccRel = AccRel.intersect_domain(Domain);
+      isl::set AccRelWrapped = AccRel.wrap();
+
+      // Determine whether a write is redundant (stores only values that are
+      // already present in the written array elements) and remove it if this
+      // is the case.
+      if (IsOrdered && MA->isMustWrite() &&
+          (isa<StoreInst>(MA->getAccessInstruction()) ||
+           MA->isOriginalScalarKind())) {
+        Value *StoredVal = MA->tryGetValueStored();
+        if (!StoredVal)
+          StoredVal = MA->getAccessValue();
+
+        if (StoredVal) {
+          // Lookup in the set of known values.
+          isl::map AccRelStoredVal = isl::map::from_domain_and_range(
+              AccRelWrapped, makeValueSet(StoredVal));
+          if (isl::union_map(AccRelStoredVal).is_subset(Known)) {
+            LLVM_DEBUG(dbgs() << "Cleanup of " << MA << ":\n");
+            LLVM_DEBUG(dbgs() << "      Scalar: " << *StoredVal << "\n");
+            LLVM_DEBUG(dbgs() << "      AccRel: " << AccRel << "\n");
 
-          // { Domain[] -> Element[] }
-          isl::map UndefAnything =
-              isl::map::from_domain_and_range(UndefDomain, ElementUniverse);
-
-          // We are looking a compatible write access. The other write can
-          // access these elements...
-          isl::map AllowedAccesses = AccRel.unite(UndefAnything);
-
-          // ... and must write the same value.
-          // { [Domain[] -> Element[]] -> Value[] }
-          isl::map Filter =
-              isl::map::from_domain_and_range(AllowedAccesses.wrap(), ValSet);
-
-          // Lookup future write that fulfills these conditions.
-          // { [[Domain[] -> Element[]] -> Value[]] -> MemoryAccess[] }
-          isl::union_map Filtered =
-              FutureWrites.uncurry().intersect_domain(Filter.wrap());
-
-          // Iterate through the candidates.
-          for (isl::map Map : Filtered.get_map_list()) {
-            MemoryAccess *OtherMA = (MemoryAccess *)Map.get_space()
-                                        .get_tuple_id(isl::dim::out)
-                                        .get_user();
-
-            isl::map OtherAccRel =
-                OtherMA->getLatestAccessRelation().intersect_domain(Domain);
-
-            // The filter only guaranteed that some of OtherMA's accessed
-            // elements are allowed. Verify that it only accesses allowed
-            // elements. Otherwise, continue with the next candidate.
-            if (!OtherAccRel.is_subset(AllowedAccesses).is_true())
-              continue;
-
-            // The combined access relation.
-            // { Domain[] -> Element[] }
-            isl::map NewAccRel = AccRel.unite(OtherAccRel);
-            simplify(NewAccRel);
-
-            // Carry out the coalescing.
             Stmt.removeSingleMemoryAccess(MA);
-            OtherMA->setNewAccessRelation(NewAccRel);
-
-            // We removed MA, OtherMA takes its role.
-            MA = OtherMA;
-
-            TotalWritesCoalesced[CallNo]++;
-            WritesCoalesced++;
 
-            // Don't look for more candidates.
-            break;
+            RedundantWritesRemoved++;
+            TotalRedundantWritesRemoved[CallNo]++;
           }
         }
+      }
 
-        // Two writes cannot be coalesced if there is another access (to some of
-        // the written elements) between them. Remove all visited write accesses
-        // from the list of eligible writes. Don't just remove the accessed
-        // elements, but any MemoryAccess that touches any of the invalidated
-        // elements.
-        SmallPtrSet<MemoryAccess *, 2> TouchedAccesses;
-        for (isl::map Map :
-             FutureWrites.intersect_domain(AccRelWrapped).get_map_list()) {
-          MemoryAccess *MA = (MemoryAccess *)Map.get_space()
-                                 .range()
-                                 .unwrap()
-                                 .get_tuple_id(isl::dim::out)
-                                 .get_user();
-          TouchedAccesses.insert(MA);
-        }
-        isl::union_map NewFutureWrites =
-            isl::union_map::empty(FutureWrites.get_space());
-        for (isl::map FutureWrite : FutureWrites.get_map_list()) {
-          MemoryAccess *MA = (MemoryAccess *)FutureWrite.get_space()
-                                 .range()
-                                 .unwrap()
-                                 .get_tuple_id(isl::dim::out)
-                                 .get_user();
-          if (!TouchedAccesses.count(MA))
-            NewFutureWrites = NewFutureWrites.add_map(FutureWrite);
-        }
-        FutureWrites = NewFutureWrites;
-
-        if (MA->isMustWrite() && !ValSet.is_null()) {
-          // { MemoryAccess[] }
-          auto AccSet =
-              isl::set::universe(isl::space(S->getIslCtx(), 0, 0)
-                                     .set_tuple_id(isl::dim::set, MA->getId()));
-
-          // { Val[] -> MemoryAccess[] }
-          isl::map ValAccSet = isl::map::from_domain_and_range(ValSet, AccSet);
+      // Update the know values set.
+      if (MA->isRead()) {
+        // Loaded values are the currently known values of the array element
+        // it was loaded from.
+        Value *LoadedVal = MA->getAccessValue();
+        if (LoadedVal && IsOrdered) {
+          isl::map AccRelVal = isl::map::from_domain_and_range(
+              AccRelWrapped, makeValueSet(LoadedVal));
 
-          // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] }
-          isl::map AccRelValAcc =
-              isl::map::from_domain_and_range(AccRelWrapped, ValAccSet.wrap());
-          FutureWrites = FutureWrites.add_map(AccRelValAcc);
+          Known = Known.add_map(AccRelVal);
         }
+      } else if (MA->isWrite()) {
+        // Remove (possibly) overwritten values from the known elements set.
+        // We remove all elements of the accessed array to avoid too complex
+        // isl sets.
+        isl::set AccRelUniv = isl::set::universe(AccRelWrapped.get_space());
+        Known = Known.subtract_domain(AccRelUniv);
+
+        // At this point, we could add the written value of must-writes.
+        // However, writing same values is already handled by
+        // coalesceWrites().
       }
     }
   }
+}
 
-  /// Remove writes that just write the same value already stored in the
-  /// element.
-  void removeRedundantWrites() {
-    for (auto &Stmt : *S) {
-      SmallDenseMap<Value *, isl::set> ValueSets;
-      auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set {
-        assert(V);
-        isl::set &Result = ValueSets[V];
-        if (Result.is_null()) {
-          isl_ctx *Ctx = S->getIslCtx().get();
-          std::string Name =
-              getIslCompatibleName("Val", V, ValueSets.size() - 1,
-                                   std::string(), UseInstructionNames);
-          isl::id Id = isl::manage(isl_id_alloc(Ctx, Name.c_str(), V));
-          Result = isl::set::universe(
-              isl::space(Ctx, 0, 0).set_tuple_id(isl::dim::set, Id));
-        }
-        return Result;
-      };
-
-      isl::set Domain = Stmt.getDomain();
-      Domain = Domain.intersect_params(S->getContext());
-
-      // List of element reads that still have the same value while iterating
-      // through the MemoryAccesses.
-      // { [Domain[] -> Element[]] -> Val[] }
-      isl::union_map Known = isl::union_map::empty(S->getParamSpace());
-
-      SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt));
-      for (MemoryAccess *MA : Accesses) {
-        // Is the memory access in a defined order relative to the other
-        // accesses? In region statements, only the first and the last accesses
-        // have defined order. Execution of those in the middle may depend on
-        // runtime conditions an therefore cannot be modified.
-        bool IsOrdered =
-            Stmt.isBlockStmt() || MA->isOriginalScalarKind() ||
-            (!S->getBoxedLoops().size() && MA->getAccessInstruction() &&
-             Stmt.getEntryBlock() == MA->getAccessInstruction()->getParent());
-
-        isl::map AccRel = MA->getAccessRelation();
-        AccRel = AccRel.intersect_domain(Domain);
-        isl::set AccRelWrapped = AccRel.wrap();
-
-        // Determine whether a write is redundant (stores only values that are
-        // already present in the written array elements) and remove it if this
-        // is the case.
-        if (IsOrdered && MA->isMustWrite() &&
-            (isa<StoreInst>(MA->getAccessInstruction()) ||
-             MA->isOriginalScalarKind())) {
-          Value *StoredVal = MA->tryGetValueStored();
-          if (!StoredVal)
-            StoredVal = MA->getAccessValue();
-
-          if (StoredVal) {
-            // Lookup in the set of known values.
-            isl::map AccRelStoredVal = isl::map::from_domain_and_range(
-                AccRelWrapped, makeValueSet(StoredVal));
-            if (isl::union_map(AccRelStoredVal).is_subset(Known)) {
-              LLVM_DEBUG(dbgs() << "Cleanup of " << MA << ":\n");
-              LLVM_DEBUG(dbgs() << "      Scalar: " << *StoredVal << "\n");
-              LLVM_DEBUG(dbgs() << "      AccRel: " << AccRel << "\n");
-
-              Stmt.removeSingleMemoryAccess(MA);
-
-              RedundantWritesRemoved++;
-              TotalRedundantWritesRemoved[CallNo]++;
-            }
-          }
-        }
+/// Remove statements without side effects.
+void SimplifyVisitor::removeUnnecessaryStmts() {
+  auto NumStmtsBefore = S->getSize();
+  S->simplifySCoP(true);
+  assert(NumStmtsBefore >= S->getSize());
+  StmtsRemoved = NumStmtsBefore - S->getSize();
+  LLVM_DEBUG(dbgs() << "Removed " << StmtsRemoved << " (of " << NumStmtsBefore
+                    << ") statements\n");
+  TotalStmtsRemoved[CallNo] += StmtsRemoved;
+}
 
-        // Update the know values set.
-        if (MA->isRead()) {
-          // Loaded values are the currently known values of the array element
-          // it was loaded from.
-          Value *LoadedVal = MA->getAccessValue();
-          if (LoadedVal && IsOrdered) {
-            isl::map AccRelVal = isl::map::from_domain_and_range(
-                AccRelWrapped, makeValueSet(LoadedVal));
+/// Remove accesses that have an empty domain.
+void SimplifyVisitor::removeEmptyPartialAccesses() {
+  for (ScopStmt &Stmt : *S) {
+    // Defer the actual removal to not invalidate iterators.
+    SmallVector<MemoryAccess *, 8> DeferredRemove;
 
-            Known = Known.add_map(AccRelVal);
-          }
-        } else if (MA->isWrite()) {
-          // Remove (possibly) overwritten values from the known elements set.
-          // We remove all elements of the accessed array to avoid too complex
-          // isl sets.
-          isl::set AccRelUniv = isl::set::universe(AccRelWrapped.get_space());
-          Known = Known.subtract_domain(AccRelUniv);
-
-          // At this point, we could add the written value of must-writes.
-          // However, writing same values is already handled by
-          // coalesceWrites().
-        }
-      }
+    for (MemoryAccess *MA : Stmt) {
+      if (!MA->isWrite())
+        continue;
+
+      isl::map AccRel = MA->getAccessRelation();
+      if (!AccRel.is_empty().is_true())
+        continue;
+
+      LLVM_DEBUG(
+          dbgs() << "Removing " << MA
+                 << " because it's a partial access that never occurs\n");
+      DeferredRemove.push_back(MA);
+    }
+
+    for (MemoryAccess *MA : DeferredRemove) {
+      Stmt.removeSingleMemoryAccess(MA);
+      EmptyPartialAccessesRemoved++;
+      TotalEmptyPartialAccessesRemoved[CallNo]++;
     }
   }
+}
 
-  /// Remove statements without side effects.
-  void removeUnnecessaryStmts() {
-    auto NumStmtsBefore = S->getSize();
-    S->simplifySCoP(true);
-    assert(NumStmtsBefore >= S->getSize());
-    StmtsRemoved = NumStmtsBefore - S->getSize();
-    LLVM_DEBUG(dbgs() << "Removed " << StmtsRemoved << " (of " << NumStmtsBefore
-                      << ") statements\n");
-    TotalStmtsRemoved[CallNo] += StmtsRemoved;
+/// Mark all reachable instructions and access, and sweep those that are not
+/// reachable.
+void SimplifyVisitor::markAndSweep(LoopInfo *LI) {
+  DenseSet<MemoryAccess *> UsedMA;
+  DenseSet<VirtualInstruction> UsedInsts;
+
+  // Get all reachable instructions and accesses.
+  markReachable(S, LI, UsedInsts, UsedMA);
+
+  // Remove all non-reachable accesses.
+  // We need get all MemoryAccesses first, in order to not invalidate the
+  // iterators when removing them.
+  SmallVector<MemoryAccess *, 64> AllMAs;
+  for (ScopStmt &Stmt : *S)
+    AllMAs.append(Stmt.begin(), Stmt.end());
+
+  for (MemoryAccess *MA : AllMAs) {
+    if (UsedMA.count(MA))
+      continue;
+    LLVM_DEBUG(dbgs() << "Removing " << MA
+                      << " because its value is not used\n");
+    ScopStmt *Stmt = MA->getStatement();
+    Stmt->removeSingleMemoryAccess(MA);
+
+    DeadAccessesRemoved++;
+    TotalDeadAccessesRemoved[CallNo]++;
   }
 
-  /// Remove accesses that have an empty domain.
-  void removeEmptyPartialAccesses() {
-    for (ScopStmt &Stmt : *S) {
-      // Defer the actual removal to not invalidate iterators.
-      SmallVector<MemoryAccess *, 8> DeferredRemove;
+  // Remove all non-reachable instructions.
+  for (ScopStmt &Stmt : *S) {
+    // Note that for region statements, we can only remove the non-terminator
+    // instructions of the entry block. All other instructions are not in the
+    // instructions list, but implicitly always part of the statement.
+
+    SmallVector<Instruction *, 32> AllInsts(Stmt.insts_begin(),
+                                            Stmt.insts_end());
+    SmallVector<Instruction *, 32> RemainInsts;
+
+    for (Instruction *Inst : AllInsts) {
+      auto It = UsedInsts.find({&Stmt, Inst});
+      if (It == UsedInsts.end()) {
+        LLVM_DEBUG(dbgs() << "Removing "; Inst->print(dbgs());
+                   dbgs() << " because it is not used\n");
+        DeadInstructionsRemoved++;
+        TotalDeadInstructionsRemoved[CallNo]++;
+        continue;
+      }
 
-      for (MemoryAccess *MA : Stmt) {
-        if (!MA->isWrite())
-          continue;
+      RemainInsts.push_back(Inst);
 
-        isl::map AccRel = MA->getAccessRelation();
-        if (!AccRel.is_empty().is_true())
-          continue;
+      // If instructions appear multiple times, keep only the first.
+      UsedInsts.erase(It);
+    }
 
-        LLVM_DEBUG(
-            dbgs() << "Removing " << MA
-                   << " because it's a partial access that never occurs\n");
-        DeferredRemove.push_back(MA);
-      }
+    // Set the new instruction list to be only those we did not remove.
+    Stmt.setInstructions(RemainInsts);
+  }
+}
 
-      for (MemoryAccess *MA : DeferredRemove) {
-        Stmt.removeSingleMemoryAccess(MA);
-        EmptyPartialAccessesRemoved++;
-        TotalEmptyPartialAccessesRemoved[CallNo]++;
-      }
-    }
+/// Print simplification statistics to @p OS.
+void SimplifyVisitor::printStatistics(llvm::raw_ostream &OS, int Indent) const {
+  OS.indent(Indent) << "Statistics {\n";
+  OS.indent(Indent + 4) << "Empty domains removed: " << EmptyDomainsRemoved
+                        << '\n';
+  OS.indent(Indent + 4) << "Overwrites removed: " << OverwritesRemoved << '\n';
+  OS.indent(Indent + 4) << "Partial writes coalesced: " << WritesCoalesced
+                        << "\n";
+  OS.indent(Indent + 4) << "Redundant writes removed: "
+                        << RedundantWritesRemoved << "\n";
+  OS.indent(Indent + 4) << "Accesses with empty domains removed: "
+                        << EmptyPartialAccessesRemoved << "\n";
+  OS.indent(Indent + 4) << "Dead accesses removed: " << DeadAccessesRemoved
+                        << '\n';
+  OS.indent(Indent + 4) << "Dead instructions removed: "
+                        << DeadInstructionsRemoved << '\n';
+  OS.indent(Indent + 4) << "Stmts removed: " << StmtsRemoved << "\n";
+  OS.indent(Indent) << "}\n";
+}
+
+/// Print the current state of all MemoryAccesses to @p OS.
+void SimplifyVisitor::printAccesses(llvm::raw_ostream &OS, int Indent) const {
+  OS.indent(Indent) << "After accesses {\n";
+  for (auto &Stmt : *S) {
+    OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
+    for (auto *MA : Stmt)
+      MA->print(OS);
   }
+  OS.indent(Indent) << "}\n";
+}
 
-  /// Mark all reachable instructions and access, and sweep those that are not
-  /// reachable.
-  void markAndSweep(LoopInfo *LI) {
-    DenseSet<MemoryAccess *> UsedMA;
-    DenseSet<VirtualInstruction> UsedInsts;
+bool SimplifyVisitor::visit(Scop &S, LoopInfo *LI) {
+  // Reset statistics of last processed SCoP.
+  releaseMemory();
+  assert(!isModified());
 
-    // Get all reachable instructions and accesses.
-    markReachable(S, LI, UsedInsts, UsedMA);
+  // Prepare processing of this SCoP.
+  this->S = &S;
+  ScopsProcessed[CallNo]++;
 
-    // Remove all non-reachable accesses.
-    // We need get all MemoryAccesses first, in order to not invalidate the
-    // iterators when removing them.
-    SmallVector<MemoryAccess *, 64> AllMAs;
-    for (ScopStmt &Stmt : *S)
-      AllMAs.append(Stmt.begin(), Stmt.end());
+  LLVM_DEBUG(dbgs() << "Removing statements that are never executed...\n");
+  removeEmptyDomainStmts();
 
-    for (MemoryAccess *MA : AllMAs) {
-      if (UsedMA.count(MA))
-        continue;
-      LLVM_DEBUG(dbgs() << "Removing " << MA
-                        << " because its value is not used\n");
-      ScopStmt *Stmt = MA->getStatement();
-      Stmt->removeSingleMemoryAccess(MA);
+  LLVM_DEBUG(dbgs() << "Removing partial writes that never happen...\n");
+  removeEmptyPartialAccesses();
 
-      DeadAccessesRemoved++;
-      TotalDeadAccessesRemoved[CallNo]++;
-    }
+  LLVM_DEBUG(dbgs() << "Removing overwrites...\n");
+  removeOverwrites();
 
-    // Remove all non-reachable instructions.
-    for (ScopStmt &Stmt : *S) {
-      // Note that for region statements, we can only remove the non-terminator
-      // instructions of the entry block. All other instructions are not in the
-      // instructions list, but implicitly always part of the statement.
-
-      SmallVector<Instruction *, 32> AllInsts(Stmt.insts_begin(),
-                                              Stmt.insts_end());
-      SmallVector<Instruction *, 32> RemainInsts;
-
-      for (Instruction *Inst : AllInsts) {
-        auto It = UsedInsts.find({&Stmt, Inst});
-        if (It == UsedInsts.end()) {
-          LLVM_DEBUG(dbgs() << "Removing "; Inst->print(dbgs());
-                     dbgs() << " because it is not used\n");
-          DeadInstructionsRemoved++;
-          TotalDeadInstructionsRemoved[CallNo]++;
-          continue;
-        }
+  LLVM_DEBUG(dbgs() << "Coalesce partial writes...\n");
+  coalesceWrites();
 
-        RemainInsts.push_back(Inst);
+  LLVM_DEBUG(dbgs() << "Removing redundant writes...\n");
+  removeRedundantWrites();
 
-        // If instructions appear multiple times, keep only the first.
-        UsedInsts.erase(It);
-      }
+  LLVM_DEBUG(dbgs() << "Cleanup unused accesses...\n");
+  markAndSweep(LI);
 
-      // Set the new instruction list to be only those we did not remove.
-      Stmt.setInstructions(RemainInsts);
-    }
-  }
+  LLVM_DEBUG(dbgs() << "Removing statements without side effects...\n");
+  removeUnnecessaryStmts();
 
-  /// Print simplification statistics to @p OS.
-  void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const {
-    OS.indent(Indent) << "Statistics {\n";
-    OS.indent(Indent + 4) << "Empty domains removed: " << EmptyDomainsRemoved
-                          << '\n';
-    OS.indent(Indent + 4) << "Overwrites removed: " << OverwritesRemoved
-                          << '\n';
-    OS.indent(Indent + 4) << "Partial writes coalesced: " << WritesCoalesced
-                          << "\n";
-    OS.indent(Indent + 4) << "Redundant writes removed: "
-                          << RedundantWritesRemoved << "\n";
-    OS.indent(Indent + 4) << "Accesses with empty domains removed: "
-                          << EmptyPartialAccessesRemoved << "\n";
-    OS.indent(Indent + 4) << "Dead accesses removed: " << DeadAccessesRemoved
-                          << '\n';
-    OS.indent(Indent + 4) << "Dead instructions removed: "
-                          << DeadInstructionsRemoved << '\n';
-    OS.indent(Indent + 4) << "Stmts removed: " << StmtsRemoved << "\n";
-    OS.indent(Indent) << "}\n";
-  }
+  if (isModified())
+    ScopsModified[CallNo]++;
+  LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
+  LLVM_DEBUG(dbgs() << S);
 
-  /// Print the current state of all MemoryAccesses to @p OS.
-  void printAccesses(llvm::raw_ostream &OS, int Indent = 0) const {
-    OS.indent(Indent) << "After accesses {\n";
-    for (auto &Stmt : *S) {
-      OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
-      for (auto *MA : Stmt)
-        MA->print(OS);
-    }
-    OS.indent(Indent) << "}\n";
+  auto ScopStats = S.getStatistics();
+  NumValueWrites[CallNo] += ScopStats.NumValueWrites;
+  NumValueWritesInLoops[CallNo] += ScopStats.NumValueWritesInLoops;
+  NumPHIWrites[CallNo] += ScopStats.NumPHIWrites;
+  NumPHIWritesInLoops[CallNo] += ScopStats.NumPHIWritesInLoops;
+  NumSingletonWrites[CallNo] += ScopStats.NumSingletonWrites;
+  NumSingletonWritesInLoops[CallNo] += ScopStats.NumSingletonWritesInLoops;
+
+  return false;
+}
+
+void SimplifyVisitor::printScop(raw_ostream &OS, Scop &S) const {
+  assert(&S == this->S &&
+         "Can only print analysis for the last processed SCoP");
+  printStatistics(OS);
+
+  if (!isModified()) {
+    OS << "SCoP could not be simplified\n";
+    return;
   }
+  printAccesses(OS);
+}
+
+void SimplifyVisitor::releaseMemory() {
+  S = nullptr;
+
+  EmptyDomainsRemoved = 0;
+  OverwritesRemoved = 0;
+  WritesCoalesced = 0;
+  RedundantWritesRemoved = 0;
+  EmptyPartialAccessesRemoved = 0;
+  DeadAccessesRemoved = 0;
+  DeadInstructionsRemoved = 0;
+  StmtsRemoved = 0;
+}
 
+class SimplifyLegacyPass : public ScopPass {
 public:
   static char ID;
-  explicit Simplify(int CallNo = 0) : ScopPass(ID), CallNo(CallNo) {}
+  SimplifyVisitor Imp;
+
+  explicit SimplifyLegacyPass(int CallNo = 0) : ScopPass(ID), Imp(CallNo) {}
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequiredTransitive<ScopInfoRegionPass>();
@@ -657,82 +692,41 @@ class Simplify : public ScopPass {
   }
 
   virtual bool runOnScop(Scop &S) override {
-    // Reset statistics of last processed SCoP.
-    releaseMemory();
-    assert(!isModified());
-
-    // Prepare processing of this SCoP.
-    this->S = &S;
-    ScopsProcessed[CallNo]++;
-
-    LLVM_DEBUG(dbgs() << "Removing statements that are never executed...\n");
-    removeEmptyDomainStmts();
-
-    LLVM_DEBUG(dbgs() << "Removing partial writes that never happen...\n");
-    removeEmptyPartialAccesses();
-
-    LLVM_DEBUG(dbgs() << "Removing overwrites...\n");
-    removeOverwrites();
-
-    LLVM_DEBUG(dbgs() << "Coalesce partial writes...\n");
-    coalesceWrites();
-
-    LLVM_DEBUG(dbgs() << "Removing redundant writes...\n");
-    removeRedundantWrites();
-
-    LLVM_DEBUG(dbgs() << "Cleanup unused accesses...\n");
-    LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-    markAndSweep(LI);
-
-    LLVM_DEBUG(dbgs() << "Removing statements without side effects...\n");
-    removeUnnecessaryStmts();
-
-    if (isModified())
-      ScopsModified[CallNo]++;
-    LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
-    LLVM_DEBUG(dbgs() << S);
-
-    auto ScopStats = S.getStatistics();
-    NumValueWrites[CallNo] += ScopStats.NumValueWrites;
-    NumValueWritesInLoops[CallNo] += ScopStats.NumValueWritesInLoops;
-    NumPHIWrites[CallNo] += ScopStats.NumPHIWrites;
-    NumPHIWritesInLoops[CallNo] += ScopStats.NumPHIWritesInLoops;
-    NumSingletonWrites[CallNo] += ScopStats.NumSingletonWrites;
-    NumSingletonWritesInLoops[CallNo] += ScopStats.NumSingletonWritesInLoops;
-
-    return false;
+    return Imp.visit(S, &getAnalysis<LoopInfoWrapperPass>().getLoopInfo());
   }
 
   virtual void printScop(raw_ostream &OS, Scop &S) const override {
-    assert(&S == this->S &&
-           "Can only print analysis for the last processed SCoP");
-    printStatistics(OS);
-
-    if (!isModified()) {
-      OS << "SCoP could not be simplified\n";
-      return;
-    }
-    printAccesses(OS);
+    Imp.printScop(OS, S);
   }
 
-  virtual void releaseMemory() override {
-    S = nullptr;
-
-    EmptyDomainsRemoved = 0;
-    OverwritesRemoved = 0;
-    WritesCoalesced = 0;
-    RedundantWritesRemoved = 0;
-    EmptyPartialAccessesRemoved = 0;
-    DeadAccessesRemoved = 0;
-    DeadInstructionsRemoved = 0;
-    StmtsRemoved = 0;
-  }
+  virtual void releaseMemory() override { Imp.releaseMemory(); }
 };
 
-char Simplify::ID;
+char SimplifyLegacyPass::ID;
 } // anonymous namespace
 
 namespace polly {
+llvm::PreservedAnalyses SimplifyPass::run(Scop &S, ScopAnalysisManager &SAM,
+                                          ScopStandardAnalysisResults &SAR,
+                                          SPMUpdater &U) {
+  if (!Imp.visit(S, &SAR.LI))
+    return llvm::PreservedAnalyses::all();
+
+  return llvm::PreservedAnalyses::none();
+}
+
+llvm::PreservedAnalyses
+SimplifyPrinterPass::run(Scop &S, ScopAnalysisManager &SAM,
+                         ScopStandardAnalysisResults &SAR, SPMUpdater &U) {
+  bool Changed = Imp.visit(S, &SAR.LI);
+  Imp.printScop(OS, S);
+
+  if (!Changed)
+    return llvm::PreservedAnalyses::all();
+
+  return llvm::PreservedAnalyses::none();
+}
+
 SmallVector<MemoryAccess *, 32> getAccessesInOrder(ScopStmt &Stmt) {
 
   SmallVector<MemoryAccess *, 32> Accesses;
@@ -753,10 +747,12 @@ SmallVector<MemoryAccess *, 32> getAccessesInOrder(ScopStmt &Stmt) {
 }
 } // namespace polly
 
-Pass *polly::createSimplifyPass(int CallNo) { return new Simplify(CallNo); }
+Pass *polly::createSimplifyPass(int CallNo) {
+  return new SimplifyLegacyPass(CallNo);
+}
 
-INITIALIZE_PASS_BEGIN(Simplify, "polly-simplify", "Polly - Simplify", false,
-                      false)
+INITIALIZE_PASS_BEGIN(SimplifyLegacyPass, "polly-simplify", "Polly - Simplify",
+                      false, false)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(Simplify, "polly-simplify", "Polly - Simplify", false,
-                    false)
+INITIALIZE_PASS_END(SimplifyLegacyPass, "polly-simplify", "Polly - Simplify",
+                    false, false)

diff  --git a/polly/test/Simplify/dead_access_load.ll b/polly/test/Simplify/dead_access_load.ll
index e8c501ffd01b..4739e44bade1 100644
--- a/polly/test/Simplify/dead_access_load.ll
+++ b/polly/test/Simplify/dead_access_load.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -polly-stmt-granularity=bb -polly-simplify -analyze < %s | FileCheck %s -match-full-lines
+; RUN: opt %loadPolly -polly-stmt-granularity=bb "-passes=scop(print<polly-simplify>)" -disable-output -aa-pipeline=basic-aa < %s | FileCheck %s -match-full-lines
 ;
 ; Remove a dead load-instruction
 ; (an load whose result is not used anywhere)

diff  --git a/polly/test/Simplify/dead_access_phi.ll b/polly/test/Simplify/dead_access_phi.ll
index edd56d864d5e..10fd3179ffde 100644
--- a/polly/test/Simplify/dead_access_phi.ll
+++ b/polly/test/Simplify/dead_access_phi.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -polly-stmt-granularity=bb -polly-simplify -analyze < %s | FileCheck %s -match-full-lines
+; RUN: opt %loadPolly -polly-stmt-granularity=bb "-passes=scop(print<polly-simplify>)" -disable-output -aa-pipeline=basic-aa < %s | FileCheck %s -match-full-lines
 ;
 ; Remove a dead PHI write/read pair
 ; (accesses that are effectively not used)

diff  --git a/polly/test/Simplify/dead_access_value.ll b/polly/test/Simplify/dead_access_value.ll
index 1cb707d93d06..b95c3843f5b1 100644
--- a/polly/test/Simplify/dead_access_value.ll
+++ b/polly/test/Simplify/dead_access_value.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -polly-stmt-granularity=bb -polly-simplify -analyze < %s | FileCheck %s -match-full-lines
+; RUN: opt %loadPolly -polly-stmt-granularity=bb "-passes=scop(print<polly-simplify>)" -disable-output -aa-pipeline=basic-aa < %s | FileCheck %s -match-full-lines
 ;
 ; Remove a dead value write/read pair
 ; (accesses that are effectively not used)

diff  --git a/polly/test/Simplify/dead_instruction.ll b/polly/test/Simplify/dead_instruction.ll
index 31bdaa2e2d99..4989daaa6b07 100644
--- a/polly/test/Simplify/dead_instruction.ll
+++ b/polly/test/Simplify/dead_instruction.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -polly-stmt-granularity=bb -polly-simplify -analyze < %s | FileCheck %s -match-full-lines
+; RUN: opt %loadPolly -polly-stmt-granularity=bb "-passes=scop(print<polly-simplify>)" -disable-output -aa-pipeline=basic-aa < %s | FileCheck %s -match-full-lines
 ;
 ; Remove a dead instruction
 ; (an instruction whose result is not used anywhere)

diff  --git a/polly/test/Simplify/notdead_region_exitphi.ll b/polly/test/Simplify/notdead_region_exitphi.ll
index 77c34ed8f8eb..d56549006aea 100644
--- a/polly/test/Simplify/notdead_region_exitphi.ll
+++ b/polly/test/Simplify/notdead_region_exitphi.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck %s -match-full-lines
+; RUN: opt %loadPolly "-passes=scop(print<polly-simplify>)" -disable-output -aa-pipeline=basic-aa < %s | FileCheck %s -match-full-lines
 ;
 ; Do not remove dependencies of a phi node in a region's exit block.
 ;

diff  --git a/polly/test/Simplify/notdead_region_innerphi.ll b/polly/test/Simplify/notdead_region_innerphi.ll
index 5749186f4ce5..d340fc793dc3 100644
--- a/polly/test/Simplify/notdead_region_innerphi.ll
+++ b/polly/test/Simplify/notdead_region_innerphi.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck %s -match-full-lines
+; RUN: opt %loadPolly "-passes=scop(print<polly-simplify>)" -disable-output -aa-pipeline=basic-aa < %s | FileCheck %s -match-full-lines
 ;
 ; Do not remove dependencies of a phi node within a region statement (%phi).
 ;

diff  --git a/polly/test/Simplify/notredundant_region_middle.ll b/polly/test/Simplify/notredundant_region_middle.ll
index e42e60b94806..46dac96ec920 100644
--- a/polly/test/Simplify/notredundant_region_middle.ll
+++ b/polly/test/Simplify/notredundant_region_middle.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck %s -match-full-lines
+; RUN: opt %loadPolly "-passes=scop(print<polly-simplify>)" -disable-output -aa-pipeline=basic-aa < %s | FileCheck %s -match-full-lines
 ;
 ; Do not remove redundant stores in the middle of region statements.
 ; The store in region_true could be removed, but in practice we do try to

diff  --git a/polly/test/Simplify/notredundant_synthesizable_unknownit.ll b/polly/test/Simplify/notredundant_synthesizable_unknownit.ll
index c1dba95563ac..6a01a1c70bc4 100644
--- a/polly/test/Simplify/notredundant_synthesizable_unknownit.ll
+++ b/polly/test/Simplify/notredundant_synthesizable_unknownit.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -polly-stmt-granularity=bb -polly-simplify -analyze < %s | FileCheck %s -match-full-lines
+; RUN: opt %loadPolly -polly-stmt-granularity=bb "-passes=scop(print<polly-simplify>)" -disable-output -aa-pipeline=basic-aa < %s | FileCheck %s -match-full-lines
 ;
 ; Do not remove the scalar value write of %i.trunc in inner.for.
 ; It is used by body.

diff  --git a/polly/test/Simplify/overwritten.ll b/polly/test/Simplify/overwritten.ll
index 09d48e73863f..8e82d3348eeb 100644
--- a/polly/test/Simplify/overwritten.ll
+++ b/polly/test/Simplify/overwritten.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -polly-stmt-granularity=bb -polly-simplify -analyze < %s | FileCheck -match-full-lines %s 
+; RUN: opt %loadPolly -polly-stmt-granularity=bb "-passes=scop(print<polly-simplify>)" -disable-output -aa-pipeline=basic-aa < %s | FileCheck -match-full-lines %s 
 ;
 ; Remove a store that is overwritten by another store in the same statement.
 ;

diff  --git a/polly/test/Simplify/overwritten_3store.ll b/polly/test/Simplify/overwritten_3store.ll
index 17353c65b6c4..c1a146be1c05 100644
--- a/polly/test/Simplify/overwritten_3store.ll
+++ b/polly/test/Simplify/overwritten_3store.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -polly-stmt-granularity=bb -polly-simplify -analyze < %s | FileCheck -match-full-lines %s 
+; RUN: opt %loadPolly -polly-stmt-granularity=bb "-passes=scop(print<polly-simplify>)" -disable-output -aa-pipeline=basic-aa < %s | FileCheck -match-full-lines %s 
 ;
 ; Remove a store that is overwritten by another store in the same statement.
 ; Check that even multiple stores are removed.

diff  --git a/polly/test/Simplify/overwritten_loadbetween.ll b/polly/test/Simplify/overwritten_loadbetween.ll
index c06263eb36ac..eb74910e2e98 100644
--- a/polly/test/Simplify/overwritten_loadbetween.ll
+++ b/polly/test/Simplify/overwritten_loadbetween.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck -match-full-lines %s 
+; RUN: opt %loadPolly "-passes=scop(print<polly-simplify>)" -disable-output -aa-pipeline=basic-aa < %s | FileCheck -match-full-lines %s 
 ;
 ; Do not remove overwrites when the value is read before.
 ;

diff  --git a/polly/test/Simplify/pass_existence.ll b/polly/test/Simplify/pass_existence.ll
index 833504d33522..c9b108b2390f 100644
--- a/polly/test/Simplify/pass_existence.ll
+++ b/polly/test/Simplify/pass_existence.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -disable-output "-passes=scop(print<polly-simplify>)" < %s -aa-pipeline=basic-aa < %s | FileCheck %s
 ;
 ; Simple test for the existence of the Simplify pass.
 ;

diff  --git a/polly/test/Simplify/phi_in_regionstmt.ll b/polly/test/Simplify/phi_in_regionstmt.ll
index 9ed4bc2b1e65..a64fae144443 100644
--- a/polly/test/Simplify/phi_in_regionstmt.ll
+++ b/polly/test/Simplify/phi_in_regionstmt.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck %s -match-full-lines
+; RUN: opt %loadPolly "-passes=scop(print<polly-simplify>)" -disable-output -aa-pipeline=basic-aa < %s | FileCheck %s -match-full-lines
 ;
 ; The PHINode %cond91.sink.sink.us.sink.6 is in the middle of a region
 ; statement.

diff  --git a/polly/test/Simplify/redundant.ll b/polly/test/Simplify/redundant.ll
index b3d8647ac091..4c199b171d3e 100644
--- a/polly/test/Simplify/redundant.ll
+++ b/polly/test/Simplify/redundant.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck %s -match-full-lines
+; RUN: opt %loadPolly "-passes=scop(print<polly-simplify>)" -disable-output -aa-pipeline=basic-aa < %s | FileCheck %s -match-full-lines
 ;
 ; Remove redundant store (a store that writes the same value already
 ; at the destination)

diff  --git a/polly/test/Simplify/redundant_
diff erentindex.ll b/polly/test/Simplify/redundant_
diff erentindex.ll
index 3a5e07334cbe..e82621310c26 100644
--- a/polly/test/Simplify/redundant_
diff erentindex.ll
+++ b/polly/test/Simplify/redundant_
diff erentindex.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck %s -match-full-lines
+; RUN: opt %loadPolly "-passes=scop(print<polly-simplify>)" -disable-output -aa-pipeline=basic-aa < %s | FileCheck %s -match-full-lines
 ;
 ; A store that has a 
diff erent index than the load it is storing is
 ; not redundant.

diff  --git a/polly/test/Simplify/redundant_storebetween.ll b/polly/test/Simplify/redundant_storebetween.ll
index 5e1befcb1a4a..df141402b22d 100644
--- a/polly/test/Simplify/redundant_storebetween.ll
+++ b/polly/test/Simplify/redundant_storebetween.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck %s -match-full-lines
+; RUN: opt %loadPolly "-passes=scop(print<polly-simplify>)" -disable-output -aa-pipeline=basic-aa < %s | FileCheck %s -match-full-lines
 ;
 ; Don't remove store where there is another store to the same target
 ; in-between them.