[llvm] [GlobalOpt] Refactor CleanupPointerRootUsers (PR #84694)

Anshil Gandhi via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 1 11:13:00 PDT 2024


https://github.com/gandhi56 updated https://github.com/llvm/llvm-project/pull/84694

>From b71057a812e6b9e2c567ca12d86732cfffc1b9ab Mon Sep 17 00:00:00 2001
From: Anshil Gandhi <gandhi21299 at gmail.com>
Date: Sun, 10 Mar 2024 14:29:37 -0600
Subject: [PATCH 1/2] [GlobalOpt] Refactor CleanupPointerRootUsers

Currently the algorithm combines removal of stores
and store operands to GV in a single loop. This patch
refines the current algorithm to account for certain
edge cases.

Fixes https://github.com/llvm/llvm-project/issues/64680.
---
 llvm/lib/Transforms/IPO/GlobalOpt.cpp         | 80 ++++++++-----------
 .../GlobalOpt/cleanup-pointer-root-stores.ll  | 62 ++++++++++++++
 .../Transforms/GlobalOpt/dead-store-status.ll |  2 -
 3 files changed, 97 insertions(+), 47 deletions(-)
 create mode 100644 llvm/test/Transforms/GlobalOpt/cleanup-pointer-root-stores.ll

diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index da714c9a75701b..771b09cfa8d979 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -201,61 +201,51 @@ CleanupPointerRootUsers(GlobalVariable *GV,
 
   bool Changed = false;
 
-  // If Dead[n].first is the only use of a malloc result, we can delete its
-  // chain of computation and the store to the global in Dead[n].second.
-  SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
-
+  // Collect all stores to GV.
+  SmallVector<Instruction *, 8> Writes;
   SmallVector<User *> Worklist(GV->users());
-  // Constants can't be pointers to dynamically allocated memory.
   while (!Worklist.empty()) {
     User *U = Worklist.pop_back_val();
-    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
-      Value *V = SI->getValueOperand();
-      if (isa<Constant>(V)) {
-        Changed = true;
-        SI->eraseFromParent();
-      } else if (Instruction *I = dyn_cast<Instruction>(V)) {
-        if (I->hasOneUse())
-          Dead.push_back(std::make_pair(I, SI));
-      }
-    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
-      if (isa<Constant>(MSI->getValue())) {
-        Changed = true;
-        MSI->eraseFromParent();
-      } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
-        if (I->hasOneUse())
-          Dead.push_back(std::make_pair(I, MSI));
-      }
-    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
-      GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
-      if (MemSrc && MemSrc->isConstant()) {
-        Changed = true;
-        MTI->eraseFromParent();
-      } else if (Instruction *I = dyn_cast<Instruction>(MTI->getSource())) {
-        if (I->hasOneUse())
-          Dead.push_back(std::make_pair(I, MTI));
-      }
-    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+    if (auto *SI = dyn_cast<StoreInst>(U)) {
+      Writes.push_back(SI);
+    } else if (auto *CE = dyn_cast<ConstantExpr>(U)) {
       if (isa<GEPOperator>(CE))
         append_range(Worklist, CE->users());
     }
   }
 
-  for (int i = 0, e = Dead.size(); i != e; ++i) {
-    if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) {
-      Dead[i].second->eraseFromParent();
-      Instruction *I = Dead[i].first;
-      do {
-        if (isAllocationFn(I, GetTLI))
-          break;
-        Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
-        if (!J)
-          break;
-        I->eraseFromParent();
-        I = J;
-      } while (true);
+  auto RemoveComputationChain = [&GetTLI](Instruction *I) {
+    do {
+      if (isAllocationFn(I, GetTLI))
+        break;
+      Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
+      if (!J)
+        break;
       I->eraseFromParent();
+      I = J;
+    } while (true);
+    I->eraseFromParent();
+  };
+
+  // Remove the store if it's value operand is a constant or a computation
+  // chain. If the value operand is a computation chain, determine if it is safe
+  // to remove it before removing the store. Remove both the store and the
+  // computation chain if it is safe to do so.
+  while (!Writes.empty()) {
+    auto *Write = Writes.pop_back_val();
+    auto *V = Write->getOperand(0);
+    if (isa<Constant>(V)) {
       Changed = true;
+      Write->eraseFromParent();
+    } else if (isa<Instruction>(V)) {
+      if (IsSafeComputationToRemove(V, GetTLI)) {
+        Changed = true;
+        Write->eraseFromParent();
+        RemoveComputationChain(cast<Instruction>(V));
+      } else {
+        Changed = true;
+        Write->eraseFromParent();
+      }
     }
   }
 
diff --git a/llvm/test/Transforms/GlobalOpt/cleanup-pointer-root-stores.ll b/llvm/test/Transforms/GlobalOpt/cleanup-pointer-root-stores.ll
new file mode 100644
index 00000000000000..980fb6f5dc8bfe
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/cleanup-pointer-root-stores.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=globalopt < %s -S | FileCheck %s
+
+ at a = internal unnamed_addr global i32 0, align 4
+ at b = internal unnamed_addr global [3 x ptr] zeroinitializer, align 16
+
+define i32 @main() local_unnamed_addr {
+; CHECK-LABEL: define i32 @main() local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[E:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[DOTPR:%.*]] = load i32, ptr @a, align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[DOTPR]], 3
+; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    store i32 8, ptr [[E]], align 4
+; CHECK-NEXT:    call void @bar20_()
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[E]], align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @a, align 4
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[INC]], ptr @a, align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP1]], 2
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %e = alloca i32, align 4
+  %.pr = load i32, ptr @a, align 4
+  %cmp1 = icmp slt i32 %.pr, 3
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %if.end
+  store i32 8, ptr %e, align 4
+  call void @bar20_()
+  %0 = load i32, ptr %e, align 4
+  %tobool.not = icmp eq i32 %0, 0
+  br i1 %tobool.not, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  store ptr %e, ptr getelementptr inbounds ([3 x ptr], ptr @b, i64 0, i64 2), align 16
+  %1 = load i32, ptr @a, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, ptr @a, align 4
+  %cmp = icmp slt i32 %1, 2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %if.end, %entry
+  ret i32 0
+}
+
+declare void @bar20_() local_unnamed_addr
+declare void @foo() local_unnamed_addr
diff --git a/llvm/test/Transforms/GlobalOpt/dead-store-status.ll b/llvm/test/Transforms/GlobalOpt/dead-store-status.ll
index 9a8fbb8d65f0e0..597c08929af902 100644
--- a/llvm/test/Transforms/GlobalOpt/dead-store-status.ll
+++ b/llvm/test/Transforms/GlobalOpt/dead-store-status.ll
@@ -4,8 +4,6 @@
 ; false. This was caught by the pass return status check that is hidden under
 ; EXPENSIVE_CHECKS.
 
-; CHECK: @global = internal unnamed_addr global ptr null, align 1
-
 ; CHECK-LABEL: @foo
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: ret i16 undef

>From c75b05d6c69e37d2b1bac55e46b994ea25b08e3d Mon Sep 17 00:00:00 2001
From: Anshil Gandhi <gandhi21299 at gmail.com>
Date: Mon, 1 Apr 2024 12:12:32 -0600
Subject: [PATCH 2/2] Support memintrinsics

---
 llvm/lib/Transforms/IPO/GlobalOpt.cpp         |  3 ++
 .../GlobalOpt/cleanup-pointer-root-loads.ll   | 42 +++++++++++++++++++
 .../GlobalOpt/cleanup-pointer-root-stores.ll  |  7 ++++
 3 files changed, 52 insertions(+)
 create mode 100644 llvm/test/Transforms/GlobalOpt/cleanup-pointer-root-loads.ll

diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 771b09cfa8d979..77dc1d91b4273c 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -211,6 +211,9 @@ CleanupPointerRootUsers(GlobalVariable *GV,
     } else if (auto *CE = dyn_cast<ConstantExpr>(U)) {
       if (isa<GEPOperator>(CE))
         append_range(Worklist, CE->users());
+    } else if (auto *MI = dyn_cast<MemIntrinsic>(U)) {
+      if (MI->getRawDest() == GV)
+        Writes.push_back(MI);
     }
   }
 
diff --git a/llvm/test/Transforms/GlobalOpt/cleanup-pointer-root-loads.ll b/llvm/test/Transforms/GlobalOpt/cleanup-pointer-root-loads.ll
new file mode 100644
index 00000000000000..4c0aad646efcf2
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/cleanup-pointer-root-loads.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=globalopt < %s -S | FileCheck %s
+
+ at gv = internal unnamed_addr global [3 x ptr] zeroinitializer, align 16
+ at gv2 = internal unnamed_addr global i32 0, align 4
+
+;; This test includes a load from @gv. No stores
+;; or memintrinsics with destination @gv should be removed.
+define i32 @main_with_load_from_b() local_unnamed_addr {
+; CHECK-LABEL: define i32 @main_with_load_from_b() local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[E:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store ptr [[E]], ptr getelementptr inbounds ([3 x ptr], ptr @gv, i64 0, i64 2), align 16
+; CHECK-NEXT:    [[LOAD_B:%.*]] = load ptr, ptr getelementptr inbounds ([3 x ptr], ptr @gv, i64 0, i64 2), align 16
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr @gv, ptr null, i64 8, i1 false)
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %e = alloca i32, align 4
+  store ptr %e, ptr getelementptr inbounds ([3 x ptr], ptr @gv, i64 0, i64 2), align 16
+  %load.b = load ptr, ptr getelementptr inbounds ([3 x ptr], ptr @gv, i64 0, i64 2), align 16
+  call void @llvm.memcpy.p0i8.p0i8.i64(ptr getelementptr inbounds ([3 x ptr], ptr @gv, i64 0, i64 0), ptr null, i64 8, i1 false)
+  ret i32 0
+}
+
+;; This test includes a memcpy with @c as it's source and destination
+;; operands. CleanupPointerRootUsers is not called in this case.
+define i32 @main_with_load_store_c() local_unnamed_addr {
+; CHECK-LABEL: define i32 @main_with_load_store_c() local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[E:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr @gv2, ptr @gv2, i64 4, i1 false)
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %e = alloca i32, align 4
+  call void @llvm.memcpy.p0i8.p0i8.i64(ptr @gv2, ptr @gv2, i64 4, i1 false)
+  ret i32 0
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(ptr nocapture, ptr nocapture readonly, i64, i1) local_unnamed_addr
+declare void @llvm.memset.p0i8.i64(ptr nocapture, i8, i64, i1) local_unnamed_addr
diff --git a/llvm/test/Transforms/GlobalOpt/cleanup-pointer-root-stores.ll b/llvm/test/Transforms/GlobalOpt/cleanup-pointer-root-stores.ll
index 980fb6f5dc8bfe..54ca00e64d18b2 100644
--- a/llvm/test/Transforms/GlobalOpt/cleanup-pointer-root-stores.ll
+++ b/llvm/test/Transforms/GlobalOpt/cleanup-pointer-root-stores.ll
@@ -4,6 +4,9 @@
 @a = internal unnamed_addr global i32 0, align 4
 @b = internal unnamed_addr global [3 x ptr] zeroinitializer, align 16
 
+;; This test is extracted from the issue reported in #64680, with an
+;; additional memcpy and a memset. Ensure all stores and memintrinsics with
+;; destination @b are removed as @b is dead.
 define i32 @main() local_unnamed_addr {
 ; CHECK-LABEL: define i32 @main() local_unnamed_addr {
 ; CHECK-NEXT:  entry:
@@ -52,11 +55,15 @@ if.end:                                           ; preds = %if.then, %for.body
   %inc = add nsw i32 %1, 1
   store i32 %inc, ptr @a, align 4
   %cmp = icmp slt i32 %1, 2
+  call void @llvm.memset.p0i8.i64(ptr getelementptr inbounds ([3 x ptr], ptr @b, i64 0, i64 0), i8 0, i64 8, i1 false)
   br i1 %cmp, label %for.body, label %for.end
 
 for.end:                                          ; preds = %if.end, %entry
+  call void @llvm.memcpy.p0i8.p0i8.i64(ptr getelementptr inbounds ([3 x ptr], ptr @b, i64 0, i64 0), ptr null, i64 8, i1 false)
   ret i32 0
 }
 
 declare void @bar20_() local_unnamed_addr
 declare void @foo() local_unnamed_addr
+declare void @llvm.memcpy.p0i8.p0i8.i64(ptr nocapture, ptr nocapture readonly, i64, i1) local_unnamed_addr
+declare void @llvm.memset.p0i8.i64(ptr nocapture, i8, i64, i1) local_unnamed_addr



More information about the llvm-commits mailing list