[llvm] [MemCpyOpt] move SrcAlloca to the entry if transformation is performed (PR #67226)

Kohei Asano via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 23 01:33:34 PDT 2023


https://github.com/khei4 created https://github.com/llvm/llvm-project/pull/67226

This is fixup for https://github.com/llvm/llvm-project/pull/66618#discussion_r1328523770 . 
This transformation checks whether allocas are static, if the transformation is performed. This patch moves the SrcAlloca to the entry of the BB when the optimization performed.

>From 681566e3760d3302d449b6d75ae3d500c71edbfb Mon Sep 17 00:00:00 2001
From: khei4 <kk.asano.luxy at gmail.com>
Date: Sat, 23 Sep 2023 17:28:35 +0900
Subject: [PATCH] [MemCpyOpt] move SrcAlloca to the entry if transformation is
 performed

---
 .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 18 +++----
 llvm/test/Transforms/MemCpyOpt/stack-move.ll  | 49 ++++++++-----------
 2 files changed, 28 insertions(+), 39 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index d87f2fb59814edf..2639a49fda71723 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1458,8 +1458,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
 
   // Check that src and dest are never captured, unescaped allocas. Also
   // find the nearest common dominator and postdominator for all users in
-  // order to shrink wrap the lifetimes, and instructions with noalias metadata
-  // to remove them.
+  // order to shrink wrap the lifetimes, and instructions with noalias
+  // metadata to remove them.
 
   SmallVector<Instruction *, 4> LifetimeMarkers;
   SmallSet<Instruction *, 4> NoAliasInstrs;
@@ -1483,14 +1483,6 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
       Worklist.pop_back();
       for (const Use &U : I->uses()) {
         auto *UI = cast<Instruction>(U.getUser());
-        // TODO: We can perform the transformation if we move src alloca to
-        // before the dominator of all uses. If any use that isn't dominated by
-        // SrcAlloca exists, non-dominating uses will be produced.
-        if (!DT->dominates(SrcAlloca, UI)) {
-          LLVM_DEBUG(dbgs() << "Stack Move: SrcAlloca doesn't dominate all "
-                               "uses for the location, bailing\n");
-          return false;
-        }
         if (Visited.size() >= MaxUsesToExplore) {
           LLVM_DEBUG(
               dbgs()
@@ -1600,7 +1592,11 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
   if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
     return false;
 
-  // We can do the transformation. First, align the allocas appropriately.
+  // We can do the transformation. First, move the SrcAlloca to the entry point.
+  // This is safe because any static alloca is in EntryBB.
+  SrcAlloca->moveBefore(*SrcAlloca->getParent(),
+                        SrcAlloca->getParent()->getFirstInsertionPt());
+  // Align the allocas appropriately.
   SrcAlloca->setAlignment(
       std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
 
diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index f6be486dce8ac1e..a17a5530072ff82 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -94,6 +94,26 @@ define void @load_store() {
   ret void
 }
 
+define i32 @use_not_dominated_by_src_alloca() {
+; CHECK-LABEL: define i32 @use_not_dominated_by_src_alloca() {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca i8, align 4
+; CHECK-NEXT:    [[DEST_GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 -1
+; CHECK-NEXT:    [[DEST_USE:%.*]] = load i8, ptr [[DEST_GEP]], align 1
+; CHECK-NEXT:    ret i32 0
+;
+  %dest = alloca i1, align 1
+  ; Replacing the use of dest with src causes no domination uses.
+  %dest.gep = getelementptr i64, ptr %dest, i64 -1
+  %dest.use = load i8, ptr %dest.gep, align 1
+  %src = alloca i8, align 4
+  %src.val = load i1, ptr %src, align 4
+
+  store i1 %src.val, ptr %dest, align 1
+
+  ret i32 0
+}
+
+
 ; Tests that merging two allocas shouldn't be more poisonous, smaller aligned src is valid.
 define void @align_up() {
 ; CHECK-LABEL: define void @align_up() {
@@ -718,13 +738,12 @@ unr2:
 
 }
 
-; TODO: merge allocas for multi basicblock loop case.
 define void @multi_bb_loop(i32 %n) {
 ; CHECK-LABEL: define void @multi_bb_loop
 ; CHECK-SAME: (i32 [[N:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[NLT1:%.*]] = icmp slt i32 [[N]], 1
 ; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
+; CHECK-NEXT:    [[NLT1:%.*]] = icmp slt i32 [[N]], 1
 ; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 0, i32 1, i32 42 }, ptr [[SRC]], align 4
 ; CHECK-NEXT:    br i1 [[NLT1]], label [[LOOP_EXIT:%.*]], label [[LOOP_BODY:%.*]]
 ; CHECK:       loop_body:
@@ -981,32 +1000,6 @@ bb2:
 
 ; Optimization failures follow:
 
-; TODO: we can merge those alloca if we move src alloca to the start of the BB.
-; Tests that a the optimization isn't performed,
-; when any use that isn't dominated by SrcAlloca exists.
-define i32 @use_not_dominated_by_src_alloca() {
-; CHECK-LABEL: define i32 @use_not_dominated_by_src_alloca() {
-; CHECK-NEXT:    [[DEST:%.*]] = alloca i1, align 1
-; CHECK-NEXT:    [[DEST_GEP:%.*]] = getelementptr i64, ptr [[DEST]], i64 -1
-; CHECK-NEXT:    [[DEST_USE:%.*]] = load i8, ptr [[DEST_GEP]], align 1
-; CHECK-NEXT:    [[SRC:%.*]] = alloca i8, align 4
-; CHECK-NEXT:    [[SRC_VAL:%.*]] = load i1, ptr [[SRC]], align 4
-; CHECK-NEXT:    store i1 [[SRC_VAL]], ptr [[DEST]], align 1
-; CHECK-NEXT:    ret i32 0
-;
-  %dest = alloca i1, align 1
-  ; Replacing the use of dest with src causes no domination uses.
-  %dest.gep = getelementptr i64, ptr %dest, i64 -1
-  %dest.use = load i8, ptr %dest.gep, align 1
-  %src = alloca i8, align 4
-  %src.val = load i1, ptr %src, align 4
-
-  store i1 %src.val, ptr %dest, align 1
-
-  ret i32 0
-}
-
-
 ; Tests that a memcpy that doesn't completely overwrite a stack value is a use
 ; for the purposes of liveness analysis, not a definition.
 define void @incomplete_memcpy() {



More information about the llvm-commits mailing list