[llvm] afa13ba - Reapply Move "auto-init" instructions to the dominator of their users

via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 24 09:13:29 PDT 2023


Author: serge-sans-paille
Date: 2023-04-24T18:10:10+02:00
New Revision: afa13ba18d9e0eb0de4cccb7061222fe900b732f

URL: https://github.com/llvm/llvm-project/commit/afa13ba18d9e0eb0de4cccb7061222fe900b732f
DIFF: https://github.com/llvm/llvm-project/commit/afa13ba18d9e0eb0de4cccb7061222fe900b732f.diff

LOG: Reapply Move "auto-init" instructions to the dominator of their users

Original patch (50b2a113db197a97f60ad2aace8b7382dc9b8c31) ignored the
fact that -ftrivial-auto-var-init could affect function parameters with
the sret attribute.
Just do not move instruction that don't affect alloca.
Also add missing test case for volatile instruction.

Differential Revision: https://reviews.llvm.org/D148507

Added: 
    llvm/include/llvm/Transforms/Utils/MoveAutoInit.h
    llvm/lib/Transforms/Utils/MoveAutoInit.cpp
    llvm/test/Transforms/MoveAutoInit/branch.ll
    llvm/test/Transforms/MoveAutoInit/clobber.ll
    llvm/test/Transforms/MoveAutoInit/fence.ll
    llvm/test/Transforms/MoveAutoInit/loop.ll
    llvm/test/Transforms/MoveAutoInit/scalar.ll
    llvm/test/Transforms/MoveAutoInit/sret.ll
    llvm/test/Transforms/MoveAutoInit/volatile.ll

Modified: 
    llvm/lib/Passes/PassBuilder.cpp
    llvm/lib/Passes/PassBuilderPipelines.cpp
    llvm/lib/Passes/PassRegistry.def
    llvm/lib/Transforms/Utils/CMakeLists.txt
    llvm/test/Other/new-pm-defaults.ll
    llvm/test/Other/new-pm-lto-defaults.ll
    llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
    llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
    llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
    llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
    llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
    llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/MoveAutoInit.h b/llvm/include/llvm/Transforms/Utils/MoveAutoInit.h
new file mode 100644
index 0000000000000..980b55f46f114
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/MoveAutoInit.h
@@ -0,0 +1,29 @@
+//===- MoveAutoInit.h - Move insts marked as auto-init Pass --*- C++ -*-======//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions marked as auto-init closer to their use if
+// profitable, generally because it moves them under a guard, potentially
+// skipping the overhead of the auto-init under some execution paths.
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H
+#define LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class MoveAutoInitPass : public PassInfoMixin<MoveAutoInitPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H

diff  --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index a7d65aa8f5088..55fc78d7f6cb0 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -245,6 +245,7 @@
 #include "llvm/Transforms/Utils/LowerSwitch.h"
 #include "llvm/Transforms/Utils/Mem2Reg.h"
 #include "llvm/Transforms/Utils/MetaRenamer.h"
+#include "llvm/Transforms/Utils/MoveAutoInit.h"
 #include "llvm/Transforms/Utils/NameAnonGlobals.h"
 #include "llvm/Transforms/Utils/PredicateInfo.h"
 #include "llvm/Transforms/Utils/RelLookupTableConverter.h"

diff  --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 4407ad0131e12..2b392f128243f 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -124,6 +124,7 @@
 #include "llvm/Transforms/Utils/InjectTLIMappings.h"
 #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
 #include "llvm/Transforms/Utils/Mem2Reg.h"
+#include "llvm/Transforms/Utils/MoveAutoInit.h"
 #include "llvm/Transforms/Utils/NameAnonGlobals.h"
 #include "llvm/Transforms/Utils/RelLookupTableConverter.h"
 #include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
@@ -696,6 +697,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   FPM.addPass(MemCpyOptPass());
 
   FPM.addPass(DSEPass());
+  FPM.addPass(MoveAutoInitPass());
+
   FPM.addPass(createFunctionToLoopPassAdaptor(
       LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
                /*AllowSpeculation=*/true),
@@ -1809,6 +1812,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
 
   // Nuke dead stores.
   MainFPM.addPass(DSEPass());
+  MainFPM.addPass(MoveAutoInitPass());
   MainFPM.addPass(MergedLoadStoreMotionPass());
 
   LoopPassManager LPM;

diff  --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 891221d534526..2c760adc1a409 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -331,6 +331,7 @@ FUNCTION_PASS("mem2reg", PromotePass())
 FUNCTION_PASS("memcpyopt", MemCpyOptPass())
 FUNCTION_PASS("mergeicmps", MergeICmpsPass())
 FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass())
+FUNCTION_PASS("move-auto-init", MoveAutoInitPass())
 FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
 FUNCTION_PASS("newgvn", NewGVNPass())
 FUNCTION_PASS("jump-threading", JumpThreadingPass())

diff  --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
index 6663ce6c4b0ac..a870071f3f641 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -58,6 +58,7 @@ add_llvm_component_library(LLVMTransformUtils
   MetaRenamer.cpp
   MisExpect.cpp
   ModuleUtils.cpp
+  MoveAutoInit.cpp
   NameAnonGlobals.cpp
   PredicateInfo.cpp
   PromoteMemoryToRegister.cpp

diff  --git a/llvm/lib/Transforms/Utils/MoveAutoInit.cpp b/llvm/lib/Transforms/Utils/MoveAutoInit.cpp
new file mode 100644
index 0000000000000..21249bd2cf83c
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/MoveAutoInit.cpp
@@ -0,0 +1,233 @@
+//===-- MoveAutoInit.cpp - move auto-init inst closer to their use site----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instruction maked as auto-init closer to the basic block that
+// use it, eventually removing it from some control path of the function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/MoveAutoInit.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "move-auto-init"
+
+STATISTIC(NumMoved, "Number of instructions moved");
+
+static cl::opt<unsigned> MoveAutoInitThreshold(
+    "move-auto-init-threshold", cl::Hidden, cl::init(128),
+    cl::desc("Maximum instructions to analyze per moved initialization"));
+
+static bool hasAutoInitMetadata(const Instruction &I) {
+  return I.hasMetadata(LLVMContext::MD_annotation) &&
+         any_of(I.getMetadata(LLVMContext::MD_annotation)->operands(),
+                [](const MDOperand &Op) {
+                  return cast<MDString>(Op.get())->getString() == "auto-init";
+                });
+}
+
+static std::optional<MemoryLocation> writeToAlloca(const Instruction &I) {
+  MemoryLocation ML;
+  if (auto *MI = dyn_cast<MemIntrinsic>(&I))
+    ML = MemoryLocation::getForDest(MI);
+  else if (auto *SI = dyn_cast<StoreInst>(&I))
+    ML = MemoryLocation::get(SI);
+  else
+    assert(false && "memory location set");
+
+  if (isa<AllocaInst>(getUnderlyingObject(ML.Ptr)))
+    return ML;
+  else
+    return {};
+}
+
+/// Finds a BasicBlock in the CFG where instruction `I` can be moved to while
+/// not changing the Memory SSA ordering and being guarded by at least one
+/// condition.
+static BasicBlock *usersDominator(const MemoryLocation &ML, Instruction *I,
+                                  DominatorTree &DT, MemorySSA &MSSA) {
+  BasicBlock *CurrentDominator = nullptr;
+  MemoryUseOrDef &IMA = *MSSA.getMemoryAccess(I);
+  BatchAAResults AA(MSSA.getAA());
+
+  SmallPtrSet<MemoryAccess *, 8> Visited;
+
+  auto AsMemoryAccess = [](User *U) { return cast<MemoryAccess>(U); };
+  SmallVector<MemoryAccess *> WorkList(map_range(IMA.users(), AsMemoryAccess));
+
+  while (!WorkList.empty()) {
+    MemoryAccess *MA = WorkList.pop_back_val();
+    if (!Visited.insert(MA).second)
+      continue;
+
+    if (Visited.size() > MoveAutoInitThreshold)
+      return nullptr;
+
+    bool FoundClobberingUser = false;
+    if (auto *M = dyn_cast<MemoryUseOrDef>(MA)) {
+      Instruction *MI = M->getMemoryInst();
+
+      // If this memory instruction may not clobber `I`, we can skip it.
+      // LifetimeEnd is a valid user, but we do not want it in the user
+      // dominator.
+      if (AA.getModRefInfo(MI, ML) != ModRefInfo::NoModRef &&
+          !MI->isLifetimeStartOrEnd() && MI != I) {
+        FoundClobberingUser = true;
+        CurrentDominator = CurrentDominator
+                               ? DT.findNearestCommonDominator(CurrentDominator,
+                                                               MI->getParent())
+                               : MI->getParent();
+      }
+    }
+    if (!FoundClobberingUser) {
+      auto UsersAsMemoryAccesses = map_range(MA->users(), AsMemoryAccess);
+      append_range(WorkList, UsersAsMemoryAccesses);
+    }
+  }
+  return CurrentDominator;
+}
+
+static bool runMoveAutoInit(Function &F, DominatorTree &DT, MemorySSA &MSSA) {
+  BasicBlock &EntryBB = F.getEntryBlock();
+  SmallVector<std::pair<Instruction *, BasicBlock *>> JobList;
+
+  //
+  // Compute movable instructions.
+  //
+  for (Instruction &I : EntryBB) {
+    if (!hasAutoInitMetadata(I))
+      continue;
+
+    std::optional<MemoryLocation> ML = writeToAlloca(I);
+    if (!ML)
+      continue;
+
+    if (I.isVolatile())
+      continue;
+
+    BasicBlock *UsersDominator = usersDominator(ML.value(), &I, DT, MSSA);
+    if (!UsersDominator)
+      continue;
+
+    if (UsersDominator == &EntryBB)
+      continue;
+
+    // Traverse the CFG to detect cycles `UsersDominator` would be part of.
+    SmallPtrSet<BasicBlock *, 8> TransitiveSuccessors;
+    SmallVector<BasicBlock *> WorkList(successors(UsersDominator));
+    bool HasCycle = false;
+    while (!WorkList.empty()) {
+      BasicBlock *CurrBB = WorkList.pop_back_val();
+      if (CurrBB == UsersDominator)
+        // No early exit because we want to compute the full set of transitive
+        // successors.
+        HasCycle = true;
+      for (BasicBlock *Successor : successors(CurrBB)) {
+        if (!TransitiveSuccessors.insert(Successor).second)
+          continue;
+        WorkList.push_back(Successor);
+      }
+    }
+
+    // Don't insert if that could create multiple execution of I,
+    // but we can insert it in the non back-edge predecessors, if it exists.
+    if (HasCycle) {
+      BasicBlock *UsersDominatorHead = UsersDominator;
+      while (BasicBlock *UniquePredecessor =
+                 UsersDominatorHead->getUniquePredecessor())
+        UsersDominatorHead = UniquePredecessor;
+
+      if (UsersDominatorHead == &EntryBB)
+        continue;
+
+      BasicBlock *DominatingPredecessor = nullptr;
+      for (BasicBlock *Pred : predecessors(UsersDominatorHead)) {
+        // If one of the predecessor of the dominator also transitively is a
+        // successor, moving to the dominator would do the inverse of loop
+        // hoisting, and we don't want that.
+        if (TransitiveSuccessors.count(Pred))
+          continue;
+
+        DominatingPredecessor =
+            DominatingPredecessor
+                ? DT.findNearestCommonDominator(DominatingPredecessor, Pred)
+                : Pred;
+      }
+
+      if (!DominatingPredecessor || DominatingPredecessor == &EntryBB)
+        continue;
+
+      UsersDominator = DominatingPredecessor;
+    }
+
+    // CatchSwitchInst blocks can only have one instruction, so they are not
+    // good candidates for insertion.
+    while (isa<CatchSwitchInst>(UsersDominator->getFirstInsertionPt())) {
+      for (BasicBlock *Pred : predecessors(UsersDominator))
+        UsersDominator = DT.findNearestCommonDominator(UsersDominator, Pred);
+    }
+
+    // We finally found a place where I can be moved while not introducing extra
+    // execution, and guarded by at least one condition.
+    if (UsersDominator != &EntryBB)
+      JobList.emplace_back(&I, UsersDominator);
+  }
+
+  //
+  // Perform the actual substitution.
+  //
+  if (JobList.empty())
+    return false;
+
+  MemorySSAUpdater MSSAU(&MSSA);
+
+  // Reverse insertion to respect relative order between instructions:
+  // if two instructions are moved from the same BB to the same BB, we insert
+  // the second one in the front, then the first on top of it.
+  for (auto &Job : reverse(JobList)) {
+    Job.first->moveBefore(&*Job.second->getFirstInsertionPt());
+    MSSAU.moveToPlace(MSSA.getMemoryAccess(Job.first), Job.first->getParent(),
+                      MemorySSA::InsertionPlace::Beginning);
+  }
+
+  if (VerifyMemorySSA)
+    MSSA.verifyMemorySSA();
+
+  NumMoved += JobList.size();
+
+  return true;
+}
+
+PreservedAnalyses MoveAutoInitPass::run(Function &F,
+                                        FunctionAnalysisManager &AM) {
+
+  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+  auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
+  if (!runMoveAutoInit(F, DT, MSSA))
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA;
+  PA.preserve<DominatorTreeAnalysis>();
+  PA.preserve<MemorySSAAnalysis>();
+  PA.preserveSet<CFGAnalyses>();
+  return PA;
+}

diff  --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index c4e32e6878d26..ddde17fb7e0c7 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -205,6 +205,7 @@
 ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
 ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
 ; CHECK-O23SZ-NEXT: Running pass: DSEPass
+; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
 ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
 ; CHECK-O23SZ-NEXT: Running pass: LICMPass

diff  --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
index eb4ad0610b326..7f0b335b867d0 100644
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -106,6 +106,7 @@
 ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo
 ; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
+; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass on foo

diff  --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
index e99250d768aa5..d8ce164628eed 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
@@ -146,6 +146,7 @@
 ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
 ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
 ; CHECK-O23SZ-NEXT: Running pass: DSEPass
+; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
 ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
 ; CHECK-O23SZ-NEXT: Running pass: LICMPass on loop

diff  --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
index e460b541ac5b1..781911b7be977 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -133,6 +133,7 @@
 ; CHECK-O-NEXT: Running pass: ADCEPass
 ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
 ; CHECK-O23SZ-NEXT: Running pass: DSEPass
+; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
 ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
 ; CHECK-O23SZ-NEXT: Running pass: LICMPass

diff  --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
index a6dbe5d0a0ef9..46fd7f1a12a19 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -140,6 +140,7 @@
 ; CHECK-O-NEXT: Running pass: ADCEPass
 ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
 ; CHECK-O23SZ-NEXT: Running pass: DSEPass
+; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass
 ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
 ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
 ; CHECK-O23SZ-NEXT: Running pass: LICMPass

diff  --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
index 8baca2bd0c484..2ad8e0dde2d9f 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
@@ -146,6 +146,7 @@
 ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
 ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
 ; CHECK-O23SZ-NEXT: Running pass: DSEPass
+; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass
 ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
 ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
 ; CHECK-O23SZ-NEXT: Running pass: LICMPass on loop

diff  --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
index 648402c911a57..fe68369b7238e 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -171,6 +171,7 @@
 ; CHECK-O-NEXT: Running pass: ADCEPass
 ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
 ; CHECK-O23SZ-NEXT: Running pass: DSEPass
+; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
 ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
 ; CHECK-O23SZ-NEXT: Running pass: LICMPass

diff  --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
index 8ad725c2d15d4..81f326fe70f7c 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
@@ -136,6 +136,7 @@
 ; CHECK-O-NEXT: Running pass: ADCEPass
 ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
 ; CHECK-O23SZ-NEXT: Running pass: DSEPass
+; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
 ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
 ; CHECK-O23SZ-NEXT: Running pass: LICMPass

diff  --git a/llvm/test/Transforms/MoveAutoInit/branch.ll b/llvm/test/Transforms/MoveAutoInit/branch.ll
new file mode 100644
index 0000000000000..6c51e33c8c8e9
--- /dev/null
+++ b/llvm/test/Transforms/MoveAutoInit/branch.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s
+
+ at __const.foo.buffer = private unnamed_addr constant [8 x i32] [i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766], align 16
+
+define void @foo(i32 %x) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BUFFER:%.*]] = alloca [8 x i32], align 16
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[BUFFER]], ptr align 16 @__const.foo.buffer, i64 32, i1 false), !annotation !0
+; CHECK-NEXT:    call void @dump(ptr [[BUFFER]])
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+;
+
+entry:
+  %buffer = alloca [8 x i32], align 16
+  call void @llvm.memcpy.p0.p0.i64(ptr align 16 %buffer, ptr align 16 @__const.foo.buffer, i64 32, i1 false), !annotation !0
+  %tobool = icmp ne i32 %x, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @dump(ptr %buffer)
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+
+
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
+
+declare void @dump(ptr)
+
+!0 = !{!"auto-init"}
+

diff  --git a/llvm/test/Transforms/MoveAutoInit/clobber.ll b/llvm/test/Transforms/MoveAutoInit/clobber.ll
new file mode 100644
index 0000000000000..0d70d85119f50
--- /dev/null
+++ b/llvm/test/Transforms/MoveAutoInit/clobber.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Checks that move-auto-init can move instruction passed unclobbering memory
+; instructions.
+; RUN: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @foo(i32 noundef %0, i32 noundef %1, i32 noundef %2) #0 {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[TMP4:%.*]] = alloca [100 x i8], align 16
+; CHECK-NEXT:    [[TMP5:%.*]] = alloca [2 x i8], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [100 x i8], ptr [[TMP4]], i64 0, i64 0
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 100, ptr nonnull [[TMP6]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8], ptr [[TMP5]], i64 0, i64 0
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 2, ptr nonnull [[TMP7]]) #[[ATTR3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x i8], ptr [[TMP5]], i64 0, i64 1
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP1:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP9]], label [[TMP15:%.*]], label [[TMP10:%.*]]
+; CHECK:       10:
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(100) [[TMP6]], i8 -86, i64 100, i1 false), !annotation !0
+; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP0:%.*]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [100 x i8], ptr [[TMP4]], i64 0, i64 [[TMP11]]
+; CHECK-NEXT:    store i8 12, ptr [[TMP12]], align 1
+; CHECK-NEXT:    [[TMP13:%.*]] = load i8, ptr [[TMP6]], align 16
+; CHECK-NEXT:    [[TMP14:%.*]] = sext i8 [[TMP13]] to i32
+; CHECK-NEXT:    br label [[TMP22:%.*]]
+; CHECK:       15:
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[TMP2:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP16]], label [[TMP22]], label [[TMP17:%.*]]
+; CHECK:       17:
+; CHECK-NEXT:    store i8 -86, ptr [[TMP7]], align 1, !annotation !0
+; CHECK-NEXT:    store i8 -86, ptr [[TMP8]], align 1, !annotation !0
+; CHECK-NEXT:    [[TMP18:%.*]] = sext i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [2 x i8], ptr [[TMP5]], i64 0, i64 [[TMP18]]
+; CHECK-NEXT:    store i8 12, ptr [[TMP19]], align 1
+; CHECK-NEXT:    [[TMP20:%.*]] = load i8, ptr [[TMP7]], align 1
+; CHECK-NEXT:    [[TMP21:%.*]] = sext i8 [[TMP20]] to i32
+; CHECK-NEXT:    br label [[TMP22]]
+; CHECK:       22:
+; CHECK-NEXT:    [[TMP23:%.*]] = phi i32 [ [[TMP14]], [[TMP10]] ], [ [[TMP21]], [[TMP17]] ], [ 0, [[TMP15]] ]
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 2, ptr nonnull [[TMP7]]) #[[ATTR3]]
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 100, ptr nonnull [[TMP6]]) #[[ATTR3]]
+; CHECK-NEXT:    ret i32 [[TMP23]]
+;
+
+  %4 = alloca [100 x i8], align 16
+  %5 = alloca [2 x i8], align 1
+  %6 = getelementptr inbounds [100 x i8], [100 x i8]* %4, i64 0, i64 0
+  call void @llvm.lifetime.start.p0i8(i64 100, i8* nonnull %6) #3
+  ; This memset must move.
+  call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 16 dereferenceable(100) %6, i8 -86, i64 100, i1 false), !annotation !0
+  %7 = getelementptr inbounds [2 x i8], [2 x i8]* %5, i64 0, i64 0
+  call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %7) #3
+  ; This store must move.
+  store i8 -86, i8* %7, align 1, !annotation !0
+  %8 = getelementptr inbounds [2 x i8], [2 x i8]* %5, i64 0, i64 1
+  ; This store must move.
+  store i8 -86, i8* %8, align 1, !annotation !0
+  %9 = icmp eq i32 %1, 0
+  br i1 %9, label %15, label %10
+
+10:
+  %11 = sext i32 %0 to i64
+  %12 = getelementptr inbounds [100 x i8], [100 x i8]* %4, i64 0, i64 %11
+  store i8 12, i8* %12, align 1
+  %13 = load i8, i8* %6, align 16
+  %14 = sext i8 %13 to i32
+  br label %22
+
+15:
+  %16 = icmp eq i32 %2, 0
+  br i1 %16, label %22, label %17
+
+17:
+  %18 = sext i32 %0 to i64
+  %19 = getelementptr inbounds [2 x i8], [2 x i8]* %5, i64 0, i64 %18
+  store i8 12, i8* %19, align 1
+  %20 = load i8, i8* %7, align 1
+  %21 = sext i8 %20 to i32
+  br label %22
+
+22:
+  %23 = phi i32 [ %14, %10 ], [ %21, %17 ], [ 0, %15 ]
+  call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %7) #3
+  call void @llvm.lifetime.end.p0i8(i64 100, i8* nonnull %6) #3
+  ret i32 %23
+}
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+attributes #0 = { mustprogress nofree nosync nounwind readnone uwtable willreturn }
+attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn }
+attributes #2 = { argmemonly mustprogress nofree nounwind willreturn writeonly }
+attributes #3 = { nounwind }
+
+!0 = !{!"auto-init"}

diff  --git a/llvm/test/Transforms/MoveAutoInit/fence.ll b/llvm/test/Transforms/MoveAutoInit/fence.ll
new file mode 100644
index 0000000000000..bce9c136942e9
--- /dev/null
+++ b/llvm/test/Transforms/MoveAutoInit/fence.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; In that case, the store to %val happens before the fence and cannot go past
+; it.
+define void @foo(i32 %x) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 -1431655766, ptr [[VAL]], align 4, !annotation !0
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    fence acquire
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @dump(ptr [[VAL]])
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %val = alloca i32, align 4
+  store i32 -1431655766, ptr %val, align 4, !annotation !0
+  %tobool = icmp ne i32 %x, 0
+  fence acquire
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @dump(ptr %val)
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; In that case, the store to %val happens after the fence and it is moved within
+; the true branch as expected.
+define void @bar(i32 %x) {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    fence acquire
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    store i32 -1431655766, ptr [[VAL]], align 4, !annotation !0
+; CHECK-NEXT:    call void @dump(ptr [[VAL]])
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %val = alloca i32, align 4
+  %tobool = icmp ne i32 %x, 0
+  fence acquire
+  store i32 -1431655766, ptr %val, align 4, !annotation !0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @dump(ptr %val)
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+declare void @dump(ptr)
+
+!0 = !{!"auto-init"}

diff  --git a/llvm/test/Transforms/MoveAutoInit/loop.ll b/llvm/test/Transforms/MoveAutoInit/loop.ll
new file mode 100644
index 0000000000000..71153e58f4e35
--- /dev/null
+++ b/llvm/test/Transforms/MoveAutoInit/loop.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo(i32 %x) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BUFFER:%.*]] = alloca [80 x i32], align 16
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[BUFFER]], i8 -86, i64 320, i1 false), !annotation !0
+; CHECK-NEXT:    br label [[DO_BODY:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[DEC:%.*]], [[DO_COND:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [80 x i32], ptr [[BUFFER]], i64 0, i64 0
+; CHECK-NEXT:    call void @dump(ptr [[ARRAYIDX]])
+; CHECK-NEXT:    br label [[DO_COND]]
+; CHECK:       do.cond:
+; CHECK-NEXT:    [[DEC]] = add nsw i32 [[X_ADDR_0]], -1
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X_ADDR_0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[DO_BODY]], label [[DO_END:%.*]]
+; CHECK:       do.end:
+; CHECK-NEXT:    ret void
+;
+
+entry:
+  %buffer = alloca [80 x i32], align 16
+  call void @llvm.memset.p0.i64(ptr align 16 %buffer, i8 -86, i64 320, i1 false), !annotation !0
+  br label %do.body
+
+do.body:                                          ; preds = %do.cond, %entry
+  %x.addr.0 = phi i32 [ %x, %entry ], [ %dec, %do.cond ]
+  %arrayidx = getelementptr inbounds [80 x i32], ptr %buffer, i64 0, i64 0
+  call void @dump(ptr %arrayidx)
+  br label %do.cond
+
+do.cond:                                          ; preds = %do.body
+  %dec = add nsw i32 %x.addr.0, -1
+  %tobool = icmp ne i32 %x.addr.0, 0
+  br i1 %tobool, label %do.body, label %do.end
+
+do.end:                                           ; preds = %do.cond
+  ret void
+}
+
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+
+declare void @dump(ptr )
+
+define void @bar(i32 %x, i32 %y) {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BUFFER:%.*]] = alloca [80 x i32], align 16
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[Y:%.*]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[BUFFER]], i8 -86, i64 320, i1 false), !annotation !0
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    br label [[DO_BODY:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[DEC:%.*]], [[DO_COND:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [80 x i32], ptr [[BUFFER]], i64 0, i64 0
+; CHECK-NEXT:    call void @dump(ptr [[ARRAYIDX]])
+; CHECK-NEXT:    br label [[DO_COND]]
+; CHECK:       do.cond:
+; CHECK-NEXT:    [[DEC]] = add nsw i32 [[X_ADDR_0]], -1
+; CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp ne i32 [[X_ADDR_0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL1]], label [[DO_BODY]], label [[DO_END:%.*]]
+; CHECK:       do.end:
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+;
+
+entry:
+  %buffer = alloca [80 x i32], align 16
+  call void @llvm.memset.p0.i64(ptr align 16 %buffer, i8 -86, i64 320, i1 false), !annotation !0
+  %tobool = icmp ne i32 %y, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %add = add nsw i32 %x, %y
+  br label %do.body
+
+do.body:                                          ; preds = %do.cond, %if.then
+  %x.addr.0 = phi i32 [ %add, %if.then ], [ %dec, %do.cond ]
+  %arrayidx = getelementptr inbounds [80 x i32], ptr %buffer, i64 0, i64 0
+  call void @dump(ptr %arrayidx)
+  br label %do.cond
+
+do.cond:                                          ; preds = %do.body
+  %dec = add nsw i32 %x.addr.0, -1
+  %tobool1 = icmp ne i32 %x.addr.0, 0
+  br i1 %tobool1, label %do.body, label %do.end
+
+do.end:                                           ; preds = %do.cond
+  br label %if.end
+
+if.end:                                           ; preds = %do.end, %entry
+  ret void
+}
+
+!0 = !{!"auto-init"}

diff  --git a/llvm/test/Transforms/MoveAutoInit/scalar.ll b/llvm/test/Transforms/MoveAutoInit/scalar.ll
new file mode 100644
index 0000000000000..6929aebc58ebb
--- /dev/null
+++ b/llvm/test/Transforms/MoveAutoInit/scalar.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo(i32 %x) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    store i32 -1431655766, ptr [[VAL]], align 4, !annotation !0
+; CHECK-NEXT:    call void @dump(ptr [[VAL]])
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+;
+
+entry:
+  %val = alloca i32, align 4
+  store i32 -1431655766, ptr %val, align 4, !annotation !0
+  %tobool = icmp ne i32 %x, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @dump(ptr %val)
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+declare void @dump(ptr)
+
+!0 = !{!"auto-init"}

diff  --git a/llvm/test/Transforms/MoveAutoInit/sret.ll b/llvm/test/Transforms/MoveAutoInit/sret.ll
new file mode 100644
index 0000000000000..204259e6fa121
--- /dev/null
+++ b/llvm/test/Transforms/MoveAutoInit/sret.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s
+
+; Checks that auto-init memory isntruction are mot moved when writing to an sret argument.
+
+target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"
+
+%struct.S = type { i64 }
+
+ at pattern = private unnamed_addr constant %struct.S { i64 -1 }, align 4
+
+define void @f(ptr noalias sret(%struct.S) align 4 %0, i32 noundef %1) {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:    [[TMP3:%.*]] = alloca ptr, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = alloca [[STRUCT_S:%.*]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast ptr [[TMP0:%.*]] to ptr
+; CHECK-NEXT:    store ptr [[TMP6]], ptr [[TMP3]], align 4
+; CHECK-NEXT:    store i32 [[TMP1:%.*]], ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast ptr [[TMP0]] to ptr
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP7]], ptr align 4 @pattern, i32 8, i1 false), !annotation !0
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 42
+; CHECK-NEXT:    br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP13:%.*]]
+; CHECK:       10:
+; CHECK-NEXT:    call void @g(ptr sret([[STRUCT_S]]) align 4 [[TMP5]])
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast ptr [[TMP0]] to ptr
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast ptr [[TMP5]] to ptr
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP12]], i32 8, i1 false)
+; CHECK-NEXT:    br label [[TMP13]]
+; CHECK:       13:
+; CHECK-NEXT:    ret void
+;
+  %3 = alloca ptr, align 4
+  %4 = alloca i32, align 4
+  %5 = alloca %struct.S, align 4
+  %6 = bitcast ptr %0 to ptr
+  store ptr %6, ptr %3, align 4
+  store i32 %1, ptr %4, align 4
+  %7 = bitcast ptr %0 to ptr
+  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %7, ptr align 4 @pattern, i32 8, i1 false), !annotation !0
+  %8 = load i32, ptr %4, align 4
+  %9 = icmp eq i32 %8, 42
+  br i1 %9, label %10, label %13
+
+10:                                               ; preds = %2
+  call void @g(ptr sret(%struct.S) align 4 %5)
+  %11 = bitcast ptr %0 to ptr
+  %12 = bitcast ptr %5 to ptr
+  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %11, ptr align 4 %12, i32 8, i1 false)
+  br label %13
+
+13:                                               ; preds = %10, %2
+  ret void
+}
+
+declare void @g(ptr sret(%struct.S) align 4, ...)
+
+declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) #0
+
+!0 = !{!"auto-init"}
+

diff  --git a/llvm/test/Transforms/MoveAutoInit/volatile.ll b/llvm/test/Transforms/MoveAutoInit/volatile.ll
new file mode 100644
index 0000000000000..da3cd067ff4fe
--- /dev/null
+++ b/llvm/test/Transforms/MoveAutoInit/volatile.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s
+
+; Make sure that volatile operationsa re not moved. This is overly conservative.
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo(i32 %x) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store volatile i32 -1431655766, ptr [[VAL]], align 4, !annotation !0
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @dump(ptr [[VAL]])
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+;
+
+entry:
+  %val = alloca i32, align 4
+  store volatile i32 -1431655766, ptr %val, align 4, !annotation !0
+  %tobool = icmp ne i32 %x, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  call void @dump(ptr %val)
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+declare void @dump(ptr)
+
+!0 = !{!"auto-init"}


        


More information about the llvm-commits mailing list