[llvm] [MemorySSA] Handle MemoryDef optimized away during cloning (PR #117883)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 27 06:15:00 PST 2024


https://github.com/nikic updated https://github.com/llvm/llvm-project/pull/117883

>From 3d9cf6b811c10e27b612b882710f5e5daba1aeaa Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Wed, 27 Nov 2024 12:56:24 +0100
Subject: [PATCH] [MemorySSA] Handle MemoryDef optimized away during cloning

When determining the replacement access during cloning, we currently
leave accesses for instructions that are not in the VMap alone.
This is correct if the instruction is not in VMap because it hasn't
been cloned, but not if it has been cloned and then removed.
In that case, we should walk up to the defining access, like in
other simplification cases.

To distinguish the two cases, pass in a callback that queries
where the instruction is part of the cloned region.

An alternative to this would be to delay removal of dead
instructions in SimpleLoopUnswitch until after MSSA cloning.
---
 llvm/include/llvm/Analysis/MemorySSAUpdater.h |  2 +
 llvm/lib/Analysis/MemorySSAUpdater.cpp        | 68 +++++++++++--------
 .../Analysis/MemorySSA/loop-rotate-update.ll  | 37 ++++++++++
 llvm/test/Analysis/MemorySSA/pr116227.ll      | 61 +++++++++++++++++
 4 files changed, 139 insertions(+), 29 deletions(-)
 create mode 100644 llvm/test/Analysis/MemorySSA/loop-rotate-update.ll
 create mode 100644 llvm/test/Analysis/MemorySSA/pr116227.ll

diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
index 055feceefb0546..b925ddd5de12a7 100644
--- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
@@ -276,7 +276,9 @@ class MemorySSAUpdater {
   // representation for. No other cases are supported.
   void cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
                         const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap,
+                        function_ref<bool(BasicBlock *)> IsInClonedRegion,
                         bool CloneWasSimplified = false);
+
   template <typename Iter>
   void privateUpdateExitBlocksForClonedLoop(ArrayRef<BasicBlock *> ExitBlocks,
                                             Iter ValuesBegin, Iter ValuesEnd,
diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp
index f672bd0e1e133e..050b827388d3cf 100644
--- a/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -565,24 +565,26 @@ static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
   return MA;
 }
 
-static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA,
-                                                  const ValueToValueMapTy &VMap,
-                                                  PhiToDefMap &MPhiMap,
-                                                  MemorySSA *MSSA) {
+static MemoryAccess *getNewDefiningAccessForClone(
+    MemoryAccess *MA, const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap,
+    MemorySSA *MSSA, function_ref<bool(BasicBlock *BB)> IsInClonedRegion) {
   MemoryAccess *InsnDefining = MA;
   if (MemoryDef *DefMUD = dyn_cast<MemoryDef>(InsnDefining)) {
-    if (!MSSA->isLiveOnEntryDef(DefMUD)) {
-      Instruction *DefMUDI = DefMUD->getMemoryInst();
-      assert(DefMUDI && "Found MemoryUseOrDef with no Instruction.");
-      if (Instruction *NewDefMUDI =
-              cast_or_null<Instruction>(VMap.lookup(DefMUDI))) {
-        InsnDefining = MSSA->getMemoryAccess(NewDefMUDI);
-        if (!InsnDefining || isa<MemoryUse>(InsnDefining)) {
-          // The clone was simplified, it's no longer a MemoryDef, look up.
-          InsnDefining = getNewDefiningAccessForClone(
-              DefMUD->getDefiningAccess(), VMap, MPhiMap, MSSA);
-        }
-      }
+    if (MSSA->isLiveOnEntryDef(DefMUD))
+      return DefMUD;
+
+    // If the MemoryDef is not part of the cloned region, leave it alone.
+    Instruction *DefMUDI = DefMUD->getMemoryInst();
+    assert(DefMUDI && "Found MemoryUseOrDef with no Instruction.");
+    if (!IsInClonedRegion(DefMUDI->getParent()))
+      return DefMUD;
+
+    auto *NewDefMUDI = cast_or_null<Instruction>(VMap.lookup(DefMUDI));
+    InsnDefining = NewDefMUDI ? MSSA->getMemoryAccess(NewDefMUDI) : nullptr;
+    if (!InsnDefining || isa<MemoryUse>(InsnDefining)) {
+      // The clone was simplified, it's no longer a MemoryDef, look up.
+      InsnDefining = getNewDefiningAccessForClone(
+          DefMUD->getDefiningAccess(), VMap, MPhiMap, MSSA, IsInClonedRegion);
     }
   } else {
     MemoryPhi *DefPhi = cast<MemoryPhi>(InsnDefining);
@@ -593,10 +595,10 @@ static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA,
   return InsnDefining;
 }
 
-void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
-                                        const ValueToValueMapTy &VMap,
-                                        PhiToDefMap &MPhiMap,
-                                        bool CloneWasSimplified) {
+void MemorySSAUpdater::cloneUsesAndDefs(
+    BasicBlock *BB, BasicBlock *NewBB, const ValueToValueMapTy &VMap,
+    PhiToDefMap &MPhiMap, function_ref<bool(BasicBlock *)> IsInClonedRegion,
+    bool CloneWasSimplified) {
   const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
   if (!Acc)
     return;
@@ -615,7 +617,7 @@ void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
         MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess(
             NewInsn,
             getNewDefiningAccessForClone(MUD->getDefiningAccess(), VMap,
-                                         MPhiMap, MSSA),
+                                         MPhiMap, MSSA, IsInClonedRegion),
             /*Template=*/CloneWasSimplified ? nullptr : MUD,
             /*CreationMustSucceed=*/false);
         if (NewUseOrDef)
@@ -668,8 +670,13 @@ void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
                                            ArrayRef<BasicBlock *> ExitBlocks,
                                            const ValueToValueMapTy &VMap,
                                            bool IgnoreIncomingWithNoClones) {
-  PhiToDefMap MPhiMap;
+  SmallSetVector<BasicBlock *, 16> Blocks;
+  for (BasicBlock *BB : concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
+    Blocks.insert(BB);
 
+  auto IsInClonedRegion = [&](BasicBlock *BB) { return Blocks.contains(BB); };
+
+  PhiToDefMap MPhiMap;
   auto FixPhiIncomingValues = [&](MemoryPhi *Phi, MemoryPhi *NewPhi) {
     assert(Phi && NewPhi && "Invalid Phi nodes.");
     BasicBlock *NewPhiBB = NewPhi->getBlock();
@@ -692,9 +699,10 @@ void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
         continue;
 
       // Determine incoming value and add it as incoming from IncBB.
-      NewPhi->addIncoming(
-          getNewDefiningAccessForClone(IncomingAccess, VMap, MPhiMap, MSSA),
-          IncBB);
+      NewPhi->addIncoming(getNewDefiningAccessForClone(IncomingAccess, VMap,
+                                                       MPhiMap, MSSA,
+                                                       IsInClonedRegion),
+                          IncBB);
     }
     if (auto *SingleAccess = onlySingleValue(NewPhi)) {
       MPhiMap[Phi] = SingleAccess;
@@ -716,13 +724,13 @@ void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
       MPhiMap[MPhi] = NewPhi;
     }
     // Update Uses and Defs.
-    cloneUsesAndDefs(BB, NewBlock, VMap, MPhiMap);
+    cloneUsesAndDefs(BB, NewBlock, VMap, MPhiMap, IsInClonedRegion);
   };
 
-  for (auto *BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
+  for (auto *BB : Blocks)
     ProcessBlock(BB);
 
-  for (auto *BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
+  for (auto *BB : Blocks)
     if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
       if (MemoryAccess *NewPhi = MPhiMap.lookup(MPhi))
         FixPhiIncomingValues(MPhi, cast<MemoryPhi>(NewPhi));
@@ -741,7 +749,9 @@ void MemorySSAUpdater::updateForClonedBlockIntoPred(
   PhiToDefMap MPhiMap;
   if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
     MPhiMap[MPhi] = MPhi->getIncomingValueForBlock(P1);
-  cloneUsesAndDefs(BB, P1, VM, MPhiMap, /*CloneWasSimplified=*/true);
+  cloneUsesAndDefs(
+      BB, P1, VM, MPhiMap, [&](BasicBlock *CheckBB) { return BB == CheckBB; },
+      /*CloneWasSimplified=*/true);
 }
 
 template <typename Iter>
diff --git a/llvm/test/Analysis/MemorySSA/loop-rotate-update.ll b/llvm/test/Analysis/MemorySSA/loop-rotate-update.ll
new file mode 100644
index 00000000000000..4ed4e825af5692
--- /dev/null
+++ b/llvm/test/Analysis/MemorySSA/loop-rotate-update.ll
@@ -0,0 +1,37 @@
+; RUN: opt -disable-output -passes="loop-mssa(loop-rotate),print<memoryssa>" -verify-memoryssa < %s 2>&1 | FileCheck %s
+
+; CHECK: entry:
+; CHECK-NEXT: 3 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store ptr null, ptr %p, align 8
+; CHECK-NEXT: MemoryUse(3)
+; CHECK-NEXT: %val11 = load ptr, ptr %p, align 8
+
+; CHECK: loop.latch:
+; CHECK-NEXT: 5 = MemoryPhi({loop.latch,1},{loop.latch.lr.ph,3})
+; CHECK-NEXT: MemoryUse(5)
+; CHECK-NEXT: %val2 = load ptr, ptr %p, align 8
+; CHECK-NEXT: 1 = MemoryDef(5)
+; CHECK-NEXT: store ptr null, ptr %p, align 8
+; CHECK-NEXT: MemoryUse(1)
+; CHECK-NEXT: %val1 = load ptr, ptr %p, align 8
+
+; CHECK: exit:
+; CHECK-NEXT: 4 = MemoryPhi({entry,3},{loop.exit_crit_edge,1})
+
+define void @test(ptr %p) {
+entry:
+  br label %loop
+
+loop:
+  store ptr null, ptr %p
+  %val1 = load ptr, ptr %p
+  %cmp = icmp eq ptr %val1, null
+  br i1 %cmp, label %exit, label %loop.latch
+
+loop.latch:
+  %val2 = load ptr, ptr %p
+  br label %loop
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Analysis/MemorySSA/pr116227.ll b/llvm/test/Analysis/MemorySSA/pr116227.ll
new file mode 100644
index 00000000000000..cd9686b018aa62
--- /dev/null
+++ b/llvm/test/Analysis/MemorySSA/pr116227.ll
@@ -0,0 +1,61 @@
+; RUN: opt -disable-output -passes="loop-mssa(simple-loop-unswitch<nontrivial>),print<memoryssa>" -verify-memoryssa < %s 2>&1 | FileCheck %s
+
+declare ptr @malloc() allockind("alloc,uninitialized")
+
+; CHECK-LABEL: MemorySSA for function: test
+
+; CHECK: for.body.us:
+; CHECK-NEXT: 3 = MemoryPhi({entry.split.us,liveOnEntry},{for.body.us,3})
+
+; CHECK: for.body:
+; CHECK-NEXT: 2 = MemoryPhi({entry.split,liveOnEntry},{for.body,1})
+; CHECK-NEXT: 1 = MemoryDef(2)
+; CHECK-NEXT:  %call.i = call ptr @malloc()
+
+define void @test(i1 %arg) {
+entry:
+  br label %for.body
+
+for.body:
+  %call.i = call ptr @malloc()
+  %cmp.i = icmp ne ptr %call.i, null
+  %or.cond.i = select i1 %cmp.i, i1 %arg, i1 false
+  br i1 %or.cond.i, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: MemorySSA for function: test_extra_defs
+
+; CHECK: entry:
+; CHECK-NEXT: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i8 1, ptr %p, align 1
+
+; CHECK: for.body.us:
+; CHECK-NEXT: 5 = MemoryPhi({entry.split.us,1},{for.body.us,6})
+; CHECK-NEXT: 6 = MemoryDef(5)
+; CHECK-NEXT: store i8 2, ptr %p, align 1
+
+; CHECK: for.body:
+; CHECK-NEXT: 4 = MemoryPhi({entry.split,1},{for.body,3})
+; CHECK-NEXT: 2 = MemoryDef(4)
+; CHECK-NEXT: store i8 2, ptr %p
+; CHECK-NEXT: 3 = MemoryDef(2)
+; CHECK-NEXT: %call.i = call ptr @malloc()
+
+define void @test_extra_defs(ptr %p, i1 %arg) {
+entry:
+  store i8 1, ptr %p
+  br label %for.body
+
+for.body:
+  store i8 2, ptr %p
+  %call.i = call ptr @malloc()
+  %cmp.i = icmp ne ptr %call.i, null
+  %or.cond.i = select i1 %cmp.i, i1 %arg, i1 false
+  br i1 %or.cond.i, label %exit, label %for.body
+
+exit:
+  ret void
+}



More information about the llvm-commits mailing list