[clang] c472187 - Revert "[Clang][inlineasm] Add special support for "rm" output constraints (#92040)"

Sat Feb 14 07:00:31 PST 2026

Author: Nikita Popov
Date: 2026-02-14T15:59:04+01:00
New Revision: c4721872af5605e10f06c256aae033ef15053525

URL: https://github.com/llvm/llvm-project/commit/c4721872af5605e10f06c256aae033ef15053525
DIFF: https://github.com/llvm/llvm-project/commit/c4721872af5605e10f06c256aae033ef15053525.diff

LOG: Revert "[Clang][inlineasm] Add special support for "rm" output constraints (#92040)"

This change landed without approval.

This reverts commit 45e666a8531c1148bdb170b9a120f99e1500c427.
This reverts commit a636dd4c37f12594275de2fe180ca35bc04d76ea.

Added: 
    llvm/include/llvm/CodeGen/CallBrPrepare.h
    llvm/lib/CodeGen/CallBrPrepare.cpp

Modified: 
    clang/lib/CodeGen/CGStmt.cpp
    clang/test/CodeGen/asm.c
    llvm/include/llvm/CodeGen/Passes.h
    llvm/include/llvm/CodeGen/TargetLowering.h
    llvm/include/llvm/IR/InlineAsm.h
    llvm/include/llvm/InitializePasses.h
    llvm/include/llvm/Passes/CodeGenPassBuilder.h
    llvm/lib/CodeGen/CMakeLists.txt
    llvm/lib/CodeGen/CodeGen.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/lib/CodeGen/TargetPassConfig.cpp
    llvm/lib/Passes/PassBuilder.cpp
    llvm/lib/Passes/PassRegistry.def
    llvm/test/CodeGen/AArch64/O0-pipeline.ll
    llvm/test/CodeGen/AArch64/O3-pipeline.ll
    llvm/test/CodeGen/AArch64/callbr-prepare.ll
    llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
    llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
    llvm/test/CodeGen/ARM/O3-pipeline.ll
    llvm/test/CodeGen/LoongArch/O0-pipeline.ll
    llvm/test/CodeGen/LoongArch/opt-pipeline.ll
    llvm/test/CodeGen/PowerPC/O0-pipeline.ll
    llvm/test/CodeGen/PowerPC/O3-pipeline.ll
    llvm/test/CodeGen/RISCV/O0-pipeline.ll
    llvm/test/CodeGen/RISCV/O3-pipeline.ll
    llvm/test/CodeGen/SPIRV/llc-pipeline.ll
    llvm/test/CodeGen/X86/O0-pipeline.ll
    llvm/test/CodeGen/X86/inlineasm-sched-bug.ll
    llvm/test/CodeGen/X86/llc-pipeline-npm.ll
    llvm/test/CodeGen/X86/opt-pipeline.ll
    llvm/tools/opt/optdriver.cpp
    llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn

Removed: 
    llvm/include/llvm/CodeGen/InlineAsmPrepare.h
    llvm/lib/CodeGen/InlineAsmPrepare.cpp
    llvm/test/CodeGen/X86/asm-constraints-rm.ll
    llvm/test/CodeGen/X86/inline-asm-prepare-memory.ll


################################################################################
diff  --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 9d683f27a0141..0658ecc93d88d 100644

--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -2892,21 +2892,13 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
     if (!Constraints.empty())
       Constraints += ',';
 
-    // - If this is a register output, then make the inline asm return it
-    //   by-value.
-    // - If this is an "rm" constraint, then treat it like a register output.
-    //   (We'll correct this before ISel if using the fast register allocator.)
-    // - If this is a memory result, return the value by-reference.
+    // If this is a register output, then make the inline asm return it
+    // by-value.  If this is a memory result, return the value by-reference.
     QualType QTy = OutExpr->getType();
     const bool IsScalarOrAggregate = hasScalarEvaluationKind(QTy) ||
                                      hasAggregateEvaluationKind(QTy);
-    // FIXME: Expand this to handle other constraints that include both 'r'
-    // and 'm', such as "g" (which expands to "imr").
-    const bool RegisterMemoryConstraints =
-        OutputConstraint == "rm" || OutputConstraint == "mr";
+    if (!Info.allowsMemory() && IsScalarOrAggregate) {
 
-    if (IsScalarOrAggregate &&
-        (!Info.allowsMemory() || RegisterMemoryConstraints)) {
       Constraints += "=" + OutputConstraint;
       ResultRegQualTys.push_back(QTy);
       ResultRegDests.push_back(Dest);

diff  --git a/clang/test/CodeGen/asm.c b/clang/test/CodeGen/asm.c
index 66a7142ee7fca..9687c993e6464 100644
--- a/clang/test/CodeGen/asm.c
+++ b/clang/test/CodeGen/asm.c
@@ -259,7 +259,7 @@ void t31(int len) {
   __asm__ volatile(""
                    : "+%%rm"(len), "+rm"(len));
   // CHECK: @t31
-  // CHECK: call i32 asm sideeffect "", "=*%rm,=rm,0,1,~{dirflag},~{fpsr},~{flags}"
+  // CHECK: call void asm sideeffect "", "=*%rm,=*rm,0,1,~{dirflag},~{fpsr},~{flags}"
 }
 
 // CHECK: @t32

diff  --git a/llvm/include/llvm/CodeGen/CallBrPrepare.h b/llvm/include/llvm/CodeGen/CallBrPrepare.h
new file mode 100644
index 0000000000000..d44d30b0adc17
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/CallBrPrepare.h
@@ -0,0 +1,23 @@
+//===-- CallBrPrepare - Prepare callbr for code generation ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CALLBRPREPARE_H
+#define LLVM_CODEGEN_CALLBRPREPARE_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class CallBrPreparePass : public PassInfoMixin<CallBrPreparePass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_CALLBRPREPARE_H

diff  --git a/llvm/include/llvm/CodeGen/InlineAsmPrepare.h b/llvm/include/llvm/CodeGen/InlineAsmPrepare.h
deleted file mode 100644
index 130346084b428..0000000000000
--- a/llvm/include/llvm/CodeGen/InlineAsmPrepare.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- InlineAsmPrepare - Prepare inline asm for code gen ------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_INLINEASMPREPARE_H
-#define LLVM_CODEGEN_INLINEASMPREPARE_H
-
-#include "llvm/IR/PassManager.h"
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
-
-class TargetMachine;
-
-class InlineAsmPreparePass : public PassInfoMixin<InlineAsmPreparePass> {
-  const TargetMachine *TM;
-
-public:
-  explicit InlineAsmPreparePass(const TargetMachine &TM) : TM(&TM) {}
-  LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
-
-  static bool isRequired() { return true; }
-};
-
-} // namespace llvm
-
-#endif // LLVM_CODEGEN_INLINEASMPREPARE_H

diff  --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 0fa652fb7e3e5..2717110e1b3e7 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -622,15 +622,13 @@ LLVM_ABI ModulePass *createJMCInstrumenterPass();
 /// This pass converts conditional moves to conditional jumps when profitable.
 LLVM_ABI FunctionPass *createSelectOptimizePass();
 
+LLVM_ABI FunctionPass *createCallBrPass();
+
 /// Creates Windows Secure Hot Patch pass. \see WindowsSecureHotPatching.cpp
 LLVM_ABI ModulePass *createWindowsSecureHotPatchingPass();
 
 /// Lowers KCFI operand bundles for indirect calls.
 LLVM_ABI FunctionPass *createKCFIPass();
-
-/// Process inline assembly calls to prepare for code generation.
-LLVM_ABI FunctionPass *createInlineAsmPass();
-
 } // namespace llvm
 
 #endif

diff  --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 0ed06fe7eb6fd..f6d5578412d1e 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5250,11 +5250,6 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
     /// The ValueType for the operand value.
     MVT ConstraintVT = MVT::Other;
 
-    /// The register may be folded. This is used if the constraint is "rm",
-    /// where we prefer using a register, but can fall back to a memory slot
-    /// under register pressure.
-    bool MayFoldRegister = false;
-
     /// Copy constructor for copying from a ConstraintInfo.
     AsmOperandInfo(InlineAsm::ConstraintInfo Info)
         : InlineAsm::ConstraintInfo(std::move(Info)) {}

diff  --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h
index 5f9e77b321708..fed0ccc5818f8 100644
--- a/llvm/include/llvm/IR/InlineAsm.h
+++ b/llvm/include/llvm/IR/InlineAsm.h
@@ -181,14 +181,6 @@ class InlineAsm final : public Value {
     bool hasArg() const {
       return Type == isInput || (Type == isOutput && isIndirect);
     }
-
-    /// hasRegMemConstraints - Returns true if and only if the constraint
-    /// codes are "rm". This is useful when converting between a register form
-    /// to a memory form.
-    bool hasRegMemConstraints() const {
-      return Codes.size() == 2 && is_contained(Codes, "r") &&
-             is_contained(Codes, "m");
-    }
   };
 
   /// ParseConstraints - Split up the constraint string into the specific

diff  --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index b46fabb14a04d..80ec83a58c3ac 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -76,6 +76,7 @@ LLVM_ABI void initializeCFGuardPass(PassRegistry &);
 LLVM_ABI void initializeCFGuardLongjmpPass(PassRegistry &);
 LLVM_ABI void initializeCFIFixupPass(PassRegistry &);
 LLVM_ABI void initializeCFIInstrInserterPass(PassRegistry &);
+LLVM_ABI void initializeCallBrPreparePass(PassRegistry &);
 LLVM_ABI void initializeCallGraphDOTPrinterPass(PassRegistry &);
 LLVM_ABI void initializeCallGraphViewerPass(PassRegistry &);
 LLVM_ABI void initializeCallGraphWrapperPassPass(PassRegistry &);
@@ -145,7 +146,6 @@ initializeImmutableModuleSummaryIndexWrapperPassPass(PassRegistry &);
 LLVM_ABI void initializeImplicitNullChecksPass(PassRegistry &);
 LLVM_ABI void initializeIndirectBrExpandLegacyPassPass(PassRegistry &);
 LLVM_ABI void initializeInferAddressSpacesPass(PassRegistry &);
-LLVM_ABI void initializeInlineAsmPreparePass(PassRegistry &);
 LLVM_ABI void initializeInstSimplifyLegacyPassPass(PassRegistry &);
 LLVM_ABI void initializeInstructionCombiningPassPass(PassRegistry &);
 LLVM_ABI void initializeInstructionSelectPass(PassRegistry &);

diff  --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index a8752d6367a68..68d13fefd7d8b 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
 #include "llvm/CodeGen/BranchFoldingPass.h"
+#include "llvm/CodeGen/CallBrPrepare.h"
 #include "llvm/CodeGen/CodeGenPrepare.h"
 #include "llvm/CodeGen/DeadMachineInstructionElim.h"
 #include "llvm/CodeGen/DetectDeadLanes.h"
@@ -40,7 +41,6 @@
 #include "llvm/CodeGen/GlobalMergeFunctions.h"
 #include "llvm/CodeGen/IndirectBrExpand.h"
 #include "llvm/CodeGen/InitUndef.h"
-#include "llvm/CodeGen/InlineAsmPrepare.h"
 #include "llvm/CodeGen/InterleavedAccess.h"
 #include "llvm/CodeGen/InterleavedLoadCombine.h"
 #include "llvm/CodeGen/LiveDebugValuesPass.h"
@@ -842,8 +842,7 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addISelPrepare(
   if (getOptLevel() != CodeGenOptLevel::None)
     addFunctionPass(ObjCARCContractPass(), PMW);
 
-  addFunctionPass(InlineAsmPreparePass(TM), PMW);
-
+  addFunctionPass(CallBrPreparePass(), PMW);
   // Add both the safe stack and the stack protection passes: each of them will
   // only protect functions that have corresponding attributes.
   addFunctionPass(SafeStackPass(TM), PMW);

diff  --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index bb1357214bc71..f26b2cb6fddf5 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -37,6 +37,7 @@ add_llvm_component_library(LLVMCodeGen
   BasicBlockSectionsProfileReader.cpp
   BasicBlockMatchingAndInference.cpp
   CalcSpillWeights.cpp
+  CallBrPrepare.cpp
   CallingConvLower.cpp
   CFGuardLongjmp.cpp
   CFIFixup.cpp
@@ -76,7 +77,6 @@ add_llvm_component_library(LLVMCodeGen
   IfConversion.cpp
   ImplicitNullChecks.cpp
   IndirectBrExpandPass.cpp
-  InlineAsmPrepare.cpp
   InitUndef.cpp
   InlineSpiller.cpp
   InsertCodePrefetch.cpp

diff  --git a/llvm/lib/CodeGen/CallBrPrepare.cpp b/llvm/lib/CodeGen/CallBrPrepare.cpp
new file mode 100644
index 0000000000000..77a0d0b653871
--- /dev/null
+++ b/llvm/lib/CodeGen/CallBrPrepare.cpp
@@ -0,0 +1,252 @@
+//===-- CallBrPrepare - Prepare callbr for code generation ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers callbrs in LLVM IR in order to to assist SelectionDAG's
+// codegen.
+//
+// In particular, this pass assists in inserting register copies for the output
+// values of a callbr along the edges leading to the indirect target blocks.
+// Though the output SSA value is defined by the callbr instruction itself in
+// the IR representation, the value cannot be copied to the appropriate virtual
+// registers prior to jumping to an indirect label, since the jump occurs
+// within the user-provided assembly blob.
+//
+// Instead, those copies must occur separately at the beginning of each
+// indirect target. That requires that we create a separate SSA definition in
+// each of them (via llvm.callbr.landingpad), and may require splitting
+// critical edges so we have a location to place the intrinsic. Finally, we
+// remap users of the original callbr output SSA value to instead point to the
+// appropriate llvm.callbr.landingpad value.
+//
+// Ideally, this could be done inside SelectionDAG, or in the
+// MachineInstruction representation, without the use of an IR-level intrinsic.
+// But, within the current framework, it’s simpler to implement as an IR pass.
+// (If support for callbr in GlobalISel is implemented, it’s worth considering
+// whether this is still required.)
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CallBrPrepare.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "callbr-prepare"
+
+static bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT);
+static bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
+                                 DominatorTree &DT);
+static void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic,
+                      SSAUpdater &SSAUpdate);
+static SmallVector<CallBrInst *, 2> FindCallBrs(Function &F);
+
+namespace {
+
+class CallBrPrepare : public FunctionPass {
+public:
+  CallBrPrepare() : FunctionPass(ID) {}
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnFunction(Function &F) override;
+  static char ID;
+};
+
+} // end anonymous namespace
+
+PreservedAnalyses CallBrPreparePass::run(Function &F,
+                                         FunctionAnalysisManager &FAM) {
+  bool Changed = false;
+  SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(F);
+
+  if (CBRs.empty())
+    return PreservedAnalyses::all();
+
+  auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+
+  Changed |= SplitCriticalEdges(CBRs, DT);
+  Changed |= InsertIntrinsicCalls(CBRs, DT);
+
+  if (!Changed)
+    return PreservedAnalyses::all();
+  PreservedAnalyses PA;
+  PA.preserve<DominatorTreeAnalysis>();
+  return PA;
+}
+
+char CallBrPrepare::ID = 0;
+INITIALIZE_PASS_BEGIN(CallBrPrepare, "callbrprepare", "Prepare callbr", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(CallBrPrepare, "callbrprepare", "Prepare callbr", false,
+                    false)
+
+FunctionPass *llvm::createCallBrPass() { return new CallBrPrepare(); }
+
+void CallBrPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+SmallVector<CallBrInst *, 2> FindCallBrs(Function &F) {
+  SmallVector<CallBrInst *, 2> CBRs;
+  for (BasicBlock &BB : F)
+    if (auto *CBR = dyn_cast<CallBrInst>(BB.getTerminator()))
+      if (!CBR->getType()->isVoidTy() && !CBR->use_empty())
+        CBRs.push_back(CBR);
+  return CBRs;
+}
+
+bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) {
+  bool Changed = false;
+  CriticalEdgeSplittingOptions Options(&DT);
+  Options.setMergeIdenticalEdges();
+
+  // The indirect destination might be duplicated between another parameter...
+  //   %0 = callbr ... [label %x, label %x]
+  // ...hence MergeIdenticalEdges and AllowIndentical edges, but we don't need
+  // to split the default destination if it's duplicated between an indirect
+  // destination...
+  //   %1 = callbr ... to label %x [label %x]
+  // ...hence starting at 1 and checking against successor 0 (aka the default
+  // destination).
+  for (CallBrInst *CBR : CBRs)
+    for (unsigned i = 1, e = CBR->getNumSuccessors(); i != e; ++i)
+      if (CBR->getSuccessor(i) == CBR->getSuccessor(0) ||
+          isCriticalEdge(CBR, i, /*AllowIdenticalEdges*/ true))
+        if (SplitKnownCriticalEdge(CBR, i, Options))
+          Changed = true;
+  return Changed;
+}
+
+bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) {
+  bool Changed = false;
+  SmallPtrSet<const BasicBlock *, 4> Visited;
+  IRBuilder<> Builder(CBRs[0]->getContext());
+  for (CallBrInst *CBR : CBRs) {
+    if (!CBR->getNumIndirectDests())
+      continue;
+
+    SSAUpdater SSAUpdate;
+    SSAUpdate.Initialize(CBR->getType(), CBR->getName());
+    SSAUpdate.AddAvailableValue(CBR->getParent(), CBR);
+    SSAUpdate.AddAvailableValue(CBR->getDefaultDest(), CBR);
+
+    for (BasicBlock *IndDest : CBR->getIndirectDests()) {
+      if (!Visited.insert(IndDest).second)
+        continue;
+      Builder.SetInsertPoint(&*IndDest->begin());
+      CallInst *Intrinsic = Builder.CreateIntrinsic(
+          CBR->getType(), Intrinsic::callbr_landingpad, {CBR});
+      SSAUpdate.AddAvailableValue(IndDest, Intrinsic);
+      UpdateSSA(DT, CBR, Intrinsic, SSAUpdate);
+      Changed = true;
+    }
+  }
+  return Changed;
+}
+
+static bool IsInSameBasicBlock(const Use &U, const BasicBlock *BB) {
+  const auto *I = dyn_cast<Instruction>(U.getUser());
+  return I && I->getParent() == BB;
+}
+
+#ifndef NDEBUG
+static void PrintDebugDomInfo(const DominatorTree &DT, const Use &U,
+                              const BasicBlock *BB, bool IsDefaultDest) {
+  if (!isa<Instruction>(U.getUser()))
+    return;
+  LLVM_DEBUG(dbgs() << "Use: " << *U.getUser() << ", in block "
+                    << cast<Instruction>(U.getUser())->getParent()->getName()
+                    << ", is " << (DT.dominates(BB, U) ? "" : "NOT ")
+                    << "dominated by " << BB->getName() << " ("
+                    << (IsDefaultDest ? "in" : "") << "direct)\n");
+}
+#endif
+
+void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic,
+               SSAUpdater &SSAUpdate) {
+
+  SmallPtrSet<Use *, 4> Visited;
+  BasicBlock *DefaultDest = CBR->getDefaultDest();
+  BasicBlock *LandingPad = Intrinsic->getParent();
+
+  SmallVector<Use *, 4> Uses(make_pointer_range(CBR->uses()));
+  for (Use *U : Uses) {
+    if (!Visited.insert(U).second)
+      continue;
+
+#ifndef NDEBUG
+    PrintDebugDomInfo(DT, *U, LandingPad, /*IsDefaultDest*/ false);
+    PrintDebugDomInfo(DT, *U, DefaultDest, /*IsDefaultDest*/ true);
+#endif
+
+    // Don't rewrite the use in the newly inserted intrinsic.
+    if (const auto *II = dyn_cast<IntrinsicInst>(U->getUser()))
+      if (II->getIntrinsicID() == Intrinsic::callbr_landingpad)
+        continue;
+
+    // If the Use is in the same BasicBlock as the Intrinsic call, replace
+    // the Use with the value of the Intrinsic call.
+    if (IsInSameBasicBlock(*U, LandingPad)) {
+      U->set(Intrinsic);
+      continue;
+    }
+
+    // If the Use is dominated by the default dest, do not touch it.
+    if (DT.dominates(DefaultDest, *U))
+      continue;
+
+    SSAUpdate.RewriteUse(*U);
+  }
+}
+
+bool CallBrPrepare::runOnFunction(Function &F) {
+  bool Changed = false;
+  SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(F);
+
+  if (CBRs.empty())
+    return Changed;
+
+  // It's highly likely that most programs do not contain CallBrInsts. Follow a
+  // similar pattern from SafeStackLegacyPass::runOnFunction to reuse previous
+  // domtree analysis if available, otherwise compute it lazily. This avoids
+  // forcing Dominator Tree Construction at -O0 for programs that likely do not
+  // contain CallBrInsts. It does pessimize programs with callbr at higher
+  // optimization levels, as the DominatorTree created here is not reused by
+  // subsequent passes.
+  DominatorTree *DT;
+  std::optional<DominatorTree> LazilyComputedDomTree;
+  if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+    DT = &DTWP->getDomTree();
+  else {
+    LazilyComputedDomTree.emplace(F);
+    DT = &*LazilyComputedDomTree;
+  }
+
+  if (SplitCriticalEdges(CBRs, *DT))
+    Changed = true;
+
+  if (InsertIntrinsicCalls(CBRs, *DT))
+    Changed = true;
+
+  return Changed;
+}

diff  --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 8eafb97215883..fec9a3db20142 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -27,6 +27,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeBranchFolderLegacyPass(Registry);
   initializeBranchRelaxationLegacyPass(Registry);
   initializeBreakFalseDepsPass(Registry);
+  initializeCallBrPreparePass(Registry);
   initializeCFGuardLongjmpPass(Registry);
   initializeCFIFixupPass(Registry);
   initializeCFIInstrInserterPass(Registry);
@@ -62,7 +63,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeIndirectBrExpandLegacyPassPass(Registry);
   initializeInsertCodePrefetchPass(Registry);
   initializeInitUndefLegacyPass(Registry);
-  initializeInlineAsmPreparePass(Registry);
   initializeInterleavedLoadCombinePass(Registry);
   initializeInterleavedAccessPass(Registry);
   initializeJMCInstrumenterPass(Registry);

diff  --git a/llvm/lib/CodeGen/InlineAsmPrepare.cpp b/llvm/lib/CodeGen/InlineAsmPrepare.cpp
deleted file mode 100644
index 30cbb23d4537f..0000000000000
--- a/llvm/lib/CodeGen/InlineAsmPrepare.cpp
+++ /dev/null
@@ -1,619 +0,0 @@
-//===-- InlineAsmPrepare - Prepare inline asm for code generation ---------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass lowers callbrs and inline asm in LLVM IR in order to assist
-// SelectionDAG's codegen.
-//
-// CallBrInst:
-//
-//   - Assists in inserting register copies for the output values of a callbr
-//     along the edges leading to the indirect target blocks. Though the output
-//     SSA value is defined by the callbr instruction itself in the IR
-//     representation, the value cannot be copied to the appropriate virtual
-//     registers prior to jumping to an indirect label, since the jump occurs
-//     within the user-provided assembly blob.
-//
-//     Instead, those copies must occur separately at the beginning of each
-//     indirect target. That requires that we create a separate SSA definition
-//     in each of them (via llvm.callbr.landingpad), and may require splitting
-//     critical edges so we have a location to place the intrinsic. Finally, we
-//     remap users of the original callbr output SSA value to instead point to
-//     the appropriate llvm.callbr.landingpad value.
-//
-//     Ideally, this could be done inside SelectionDAG, or in the
-//     MachineInstruction representation, without the use of an IR-level
-//     intrinsic.  But, within the current framework, it’s simpler to implement
-//     as an IR pass.  (If support for callbr in GlobalISel is implemented,
-//     it’s worth considering whether this is still required.)
-//
-// InlineAsm:
-//
-//   - Prepares inline assembly for code generation with the fast register
-//     allocator. In particular, it defaults "rm" (register-or-memory) to
-//     prefer the "m" constraints (the front-end opts for the "r" constraint),
-//     simplifying register allocation by forcing operands to memory locations.
-//     The other register allocators are equipped to handle folding registers
-//     already, so don't need to change the default.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/InlineAsmPrepare.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "inline-asm-prepare"
-
-namespace {
-
-class InlineAsmPrepare : public FunctionPass {
-public:
-  InlineAsmPrepare() : FunctionPass(ID) {}
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<TargetPassConfig>();
-    AU.addPreserved<DominatorTreeWrapperPass>();
-  }
-  bool runOnFunction(Function &F) override;
-
-  static char ID;
-};
-
-char InlineAsmPrepare::ID = 0;
-
-} // end anonymous namespace
-
-INITIALIZE_PASS_BEGIN(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts",
-                      false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts",
-                    false, false)
-
-FunctionPass *llvm::createInlineAsmPass() { return new InlineAsmPrepare(); }
-
-//===----------------------------------------------------------------------===//
-//                     Process InlineAsm instructions
-//===----------------------------------------------------------------------===//
-
-/// The inline asm constraint allows both register and memory.
-static bool IsRegMemConstraint(StringRef Constraint) {
-  return Constraint.size() == 2 && (Constraint == "rm" || Constraint == "mr");
-}
-
-/// Tag "rm" output constraints with '*' to signify that they default to a
-/// memory location.
-static std::pair<std::string, bool>
-ConvertConstraintsToMemory(StringRef ConstraintStr) {
-  auto I = ConstraintStr.begin(), E = ConstraintStr.end();
-  std::string Out;
-  raw_string_ostream O(Out);
-  bool HasRegMem = false;
-
-  while (I != E) {
-    bool IsOutput = false;
-    bool HasIndirect = false;
-    if (*I == '=') {
-      O << *I;
-      IsOutput = true;
-      ++I;
-      if (I == E)
-        return {};
-    }
-    if (*I == '*') {
-      O << '*';
-      HasIndirect = true;
-      ++I;
-      if (I == E)
-        return {};
-    }
-    if (*I == '+') {
-      O << '+';
-      IsOutput = true;
-      ++I;
-      if (I == E)
-        return {};
-    }
-
-    auto Comma = std::find(I, E, ',');
-    std::string Sub(I, Comma);
-    if (IsRegMemConstraint(Sub)) {
-      HasRegMem = true;
-      if (IsOutput && !HasIndirect)
-        O << '*';
-    }
-
-    O << Sub;
-
-    if (Comma == E)
-      break;
-
-    O << ',';
-    I = Comma + 1;
-  }
-
-  return {Out, HasRegMem};
-}
-
-/// Build a map of tied constraints. TiedOutput[i] = j means Constraint i is an
-/// input tied to output constraint j.
-static void
-BuildTiedConstraintMap(const InlineAsm::ConstraintInfoVector &Constraints,
-                       SmallVectorImpl<int> &TiedOutput) {
-  for (unsigned I = 0, E = Constraints.size(); I != E; ++I) {
-    const InlineAsm::ConstraintInfo &C = Constraints[I];
-    if (C.Type == InlineAsm::isOutput && C.hasMatchingInput()) {
-      int InputIdx = C.MatchingInput;
-      if (InputIdx >= 0 && InputIdx < (int)Constraints.size())
-        TiedOutput[InputIdx] = I;
-    }
-
-    if (C.Type == InlineAsm::isInput && C.hasMatchingInput()) {
-      int OutputIdx = C.MatchingInput;
-      if (OutputIdx >= 0 && OutputIdx < (int)Constraints.size())
-        TiedOutput[I] = OutputIdx;
-    }
-  }
-}
-
-/// Process an output constraint, creating allocas for converted constraints.
-static void ProcessOutputConstraint(
-    const InlineAsm::ConstraintInfo &C, Type *RetTy, unsigned OutputIdx,
-    IRBuilder<> &EntryBuilder, SmallVectorImpl<Value *> &NewArgs,
-    SmallVectorImpl<Type *> &NewArgTypes, SmallVectorImpl<Type *> &NewRetTypes,
-    SmallVectorImpl<std::pair<unsigned, Type *>> &ElementTypeAttrs,
-    SmallVectorImpl<AllocaInst *> &OutputAllocas, unsigned ConstraintIdx) {
-  Type *SlotTy = RetTy;
-  if (StructType *ST = dyn_cast<StructType>(RetTy))
-    SlotTy = ST->getElementType(OutputIdx);
-
-  if (C.hasRegMemConstraints()) {
-    // Converted to memory constraint. Create alloca and pass pointer as
-    // argument.
-    AllocaInst *Slot = EntryBuilder.CreateAlloca(SlotTy, nullptr, "asm_mem");
-    NewArgs.push_back(Slot);
-    NewArgTypes.push_back(Slot->getType());
-    ElementTypeAttrs.push_back({NewArgs.size() - 1, SlotTy});
-    OutputAllocas[ConstraintIdx] = Slot;
-    // No return value for this output since it's now an out-parameter.
-  } else {
-    // Unchanged, still an output return value.
-    NewRetTypes.push_back(SlotTy);
-  }
-}
-
-/// Process an input constraint, handling tied constraints and conversions.
-static void ProcessInputConstraint(const InlineAsm::ConstraintInfo &C,
-                                   Value *ArgVal, ArrayRef<int> TiedOutput,
-                                   ArrayRef<AllocaInst *> OutputAllocas,
-                                   unsigned ConstraintIdx, IRBuilder<> &Builder,
-                                   IRBuilder<> &EntryBuilder,
-                                   SmallVectorImpl<Value *> &NewArgs,
-                                   SmallVectorImpl<Type *> &NewArgTypes) {
-  Type *ArgTy = ArgVal->getType();
-
-  if (TiedOutput[ConstraintIdx] != -1) {
-    int MatchIdx = TiedOutput[ConstraintIdx];
-    if (AllocaInst *Slot = OutputAllocas[MatchIdx]) {
-      // The matched output was converted to memory. Store this input into the
-      // alloca.
-      Builder.CreateStore(ArgVal, Slot);
-
-      // Pass the alloca pointer as the argument, instead of ArgVal. This
-      // ensures the tied "0" constraint matches the "*m" output.
-      NewArgs.push_back(Slot);
-      NewArgTypes.push_back(Slot->getType());
-      return;
-    }
-  }
-
-  if (C.hasRegMemConstraints()) {
-    // Converted to memory constraint. Create alloca, store input, pass pointer
-    // as argument.
-    AllocaInst *Slot = EntryBuilder.CreateAlloca(ArgTy, nullptr, "asm_mem");
-    Builder.CreateStore(ArgVal, Slot);
-    NewArgs.push_back(Slot);
-    NewArgTypes.push_back(Slot->getType());
-  } else {
-    // Unchanged
-    NewArgs.push_back(ArgVal);
-    NewArgTypes.push_back(ArgTy);
-  }
-}
-
-/// Build the return type from the collected return types.
-static Type *BuildReturnType(ArrayRef<Type *> NewRetTypes,
-                             LLVMContext &Context) {
-  if (NewRetTypes.empty())
-    return Type::getVoidTy(Context);
-
-  if (NewRetTypes.size() == 1)
-    return NewRetTypes[0];
-
-  return StructType::get(Context, NewRetTypes);
-}
-
-/// Create the new inline assembly call with converted constraints.
-static CallInst *CreateNewInlineAsm(
-    InlineAsm *IA, const std::string &NewConstraintStr, Type *NewRetTy,
-    const SmallVectorImpl<Type *> &NewArgTypes,
-    const SmallVectorImpl<Value *> &NewArgs,
-    const SmallVectorImpl<std::pair<unsigned, Type *>> &ElementTypeAttrs,
-    CallBase *CB, IRBuilder<> &Builder, LLVMContext &Context) {
-  FunctionType *NewFTy = FunctionType::get(NewRetTy, NewArgTypes, false);
-  InlineAsm *NewIA = InlineAsm::get(
-      NewFTy, IA->getAsmString(), NewConstraintStr, IA->hasSideEffects(),
-      IA->isAlignStack(), IA->getDialect(), IA->canThrow());
-
-  CallInst *NewCall = Builder.CreateCall(NewFTy, NewIA, NewArgs);
-  NewCall->setCallingConv(CB->getCallingConv());
-  NewCall->setAttributes(CB->getAttributes());
-  NewCall->setDebugLoc(CB->getDebugLoc());
-
-  for (const std::pair<unsigned, Type *> &Item : ElementTypeAttrs)
-    NewCall->addParamAttr(
-        Item.first,
-        Attribute::get(Context, Attribute::ElementType, Item.second));
-
-  return NewCall;
-}
-
-/// Reconstruct the return value from the new call and allocas.
-static Value *
-ReconstructReturnValue(Type *RetTy, CallInst *NewCall,
-                       const InlineAsm::ConstraintInfoVector &Constraints,
-                       const SmallVectorImpl<AllocaInst *> &OutputAllocas,
-                       const SmallVectorImpl<Type *> &NewRetTypes,
-                       IRBuilder<> &Builder) {
-  if (RetTy->isVoidTy())
-    return nullptr;
-
-  if (isa<StructType>(RetTy)) {
-    // Multiple outputs. Reconstruct the struct.
-    Value *Res = PoisonValue::get(RetTy);
-    unsigned NewRetIdx = 0;
-    unsigned OriginalOutIdx = 0;
-
-    for (unsigned I = 0, E = Constraints.size(); I != E; ++I) {
-      if (Constraints[I].Type != InlineAsm::isOutput)
-        continue;
-
-      Value *Val = nullptr;
-      if (AllocaInst *Slot = OutputAllocas[I]) {
-        // Converted to memory. Load from alloca.
-        Val = Builder.CreateLoad(Slot->getAllocatedType(), Slot);
-      } else {
-        // Not converted. Extract from NewCall return.
-        if (NewRetTypes.size() == 1) {
-          Val = NewCall;
-        } else {
-          Val = Builder.CreateExtractValue(NewCall, NewRetIdx);
-        }
-        NewRetIdx++;
-      }
-
-      Res = Builder.CreateInsertValue(Res, Val, OriginalOutIdx++);
-    }
-
-    return Res;
-  }
-
-  // Single output.
-  // Find the output constraint (should be the first one).
-  unsigned OutConstraintIdx = 0;
-  for (unsigned I = 0; I < Constraints.size(); ++I) {
-    if (Constraints[I].Type == InlineAsm::isOutput) {
-      OutConstraintIdx = I;
-      break;
-    }
-  }
-
-  if (AllocaInst *Slot = OutputAllocas[OutConstraintIdx])
-    return Builder.CreateLoad(Slot->getAllocatedType(), Slot);
-
-  return NewCall;
-}
-
-static bool ProcessInlineAsm(Function &F, CallBase *CB) {
-  InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
-  const InlineAsm::ConstraintInfoVector &Constraints = IA->ParseConstraints();
-
-  auto [NewConstraintStr, HasRegMem] =
-      ConvertConstraintsToMemory(IA->getConstraintString());
-  if (!HasRegMem)
-    return false;
-
-  IRBuilder<> Builder(CB);
-  IRBuilder<> EntryBuilder(&F.getEntryBlock(), F.getEntryBlock().begin());
-
-  // Collect new arguments and return types.
-  SmallVector<Value *, 8> NewArgs;
-  SmallVector<Type *, 8> NewArgTypes;
-  SmallVector<Type *, 2> NewRetTypes;
-  SmallVector<std::pair<unsigned, Type *>, 8> ElementTypeAttrs;
-
-  // Track allocas created for converted outputs. Indexed by position in the
-  // flat Constraints list (not by output index), so that both
-  // ProcessOutputConstraint and ReconstructReturnValue can look up entries
-  // using the same constraint index.
-  SmallVector<AllocaInst *, 8> OutputAllocas(Constraints.size(), nullptr);
-
-  // Build tied constraint map.
-  SmallVector<int, 8> TiedOutput(Constraints.size(), -1);
-  BuildTiedConstraintMap(Constraints, TiedOutput);
-
-  // Process constraints.
-  unsigned ArgNo = 0;
-  unsigned OutputIdx = 0;
-  for (unsigned I = 0, E = Constraints.size(); I != E; ++I) {
-    const InlineAsm::ConstraintInfo &C = Constraints[I];
-
-    if (C.Type == InlineAsm::isOutput) {
-      if (C.isIndirect) {
-        // Indirect output takes a pointer argument from the original call.
-        // Pass it through to the new call.
-        Value *ArgVal = CB->getArgOperand(ArgNo);
-        NewArgs.push_back(ArgVal);
-        NewArgTypes.push_back(ArgVal->getType());
-        // Preserve element type attribute if present.
-        if (auto *Ty = CB->getParamElementType(ArgNo))
-          ElementTypeAttrs.push_back({NewArgs.size() - 1, Ty});
-        ArgNo++;
-      } else {
-        ProcessOutputConstraint(C, CB->getType(), OutputIdx, EntryBuilder,
-                                NewArgs, NewArgTypes, NewRetTypes,
-                                ElementTypeAttrs, OutputAllocas, I);
-        OutputIdx++;
-      }
-    } else if (C.Type == InlineAsm::isInput) {
-      Value *ArgVal = CB->getArgOperand(ArgNo);
-      ProcessInputConstraint(C, ArgVal, TiedOutput, OutputAllocas, I, Builder,
-                             EntryBuilder, NewArgs, NewArgTypes);
-      ArgNo++;
-    }
-  }
-
-  // Build the new return type.
-  Type *NewRetTy = BuildReturnType(NewRetTypes, F.getContext());
-
-  // Create the new inline assembly call.
-  CallInst *NewCall =
-      CreateNewInlineAsm(IA, NewConstraintStr, NewRetTy, NewArgTypes, NewArgs,
-                         ElementTypeAttrs, CB, Builder, F.getContext());
-
-  // Reconstruct the return value and update users.
-  if (!CB->use_empty()) {
-    if (Value *Replacement =
-            ReconstructReturnValue(CB->getType(), NewCall, Constraints,
-                                   OutputAllocas, NewRetTypes, Builder))
-      CB->replaceAllUsesWith(Replacement);
-  }
-
-  CB->eraseFromParent();
-  return true;
-}
-
-//===----------------------------------------------------------------------===//
-//                           Process CallBrInsts
-//===----------------------------------------------------------------------===//
-
-/// The Use is in the same BasicBlock as the intrinsic call.
-static bool IsInSameBasicBlock(const Use &U, const BasicBlock *BB) {
-  const auto *I = dyn_cast<Instruction>(U.getUser());
-  return I && I->getParent() == BB;
-}
-
-#ifndef NDEBUG
-static void PrintDebugDomInfo(const DominatorTree &DT, const Use &U,
-                              const BasicBlock *BB, bool IsDefaultDest) {
-  if (isa<Instruction>(U.getUser()))
-    LLVM_DEBUG(dbgs() << "Use: " << *U.getUser() << ", in block "
-                      << cast<Instruction>(U.getUser())->getParent()->getName()
-                      << ", is " << (DT.dominates(BB, U) ? "" : "NOT ")
-                      << "dominated by " << BB->getName() << " ("
-                      << (IsDefaultDest ? "in" : "") << "direct)\n");
-}
-#endif
-
-static void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic,
-                      SSAUpdater &SSAUpdate) {
-  SmallPtrSet<Use *, 4> Visited;
-
-  BasicBlock *DefaultDest = CBR->getDefaultDest();
-  BasicBlock *LandingPad = Intrinsic->getParent();
-  SmallVector<Use *, 4> Uses(make_pointer_range(CBR->uses()));
-
-  for (Use *U : Uses) {
-    if (!Visited.insert(U).second)
-      continue;
-
-#ifndef NDEBUG
-    PrintDebugDomInfo(DT, *U, LandingPad, /*IsDefaultDest*/ false);
-    PrintDebugDomInfo(DT, *U, DefaultDest, /*IsDefaultDest*/ true);
-#endif
-
-    // Don't rewrite the use in the newly inserted intrinsic.
-    if (const auto *II = dyn_cast<IntrinsicInst>(U->getUser()))
-      if (II->getIntrinsicID() == Intrinsic::callbr_landingpad)
-        continue;
-
-    // If the Use is in the same BasicBlock as the Intrinsic call, replace
-    // the Use with the value of the Intrinsic call.
-    if (IsInSameBasicBlock(*U, LandingPad)) {
-      U->set(Intrinsic);
-      continue;
-    }
-
-    // If the Use is dominated by the default dest, do not touch it.
-    if (DT.dominates(DefaultDest, *U))
-      continue;
-
-    SSAUpdate.RewriteUse(*U);
-  }
-}
-
-static bool SplitCriticalEdges(CallBrInst *CBR, DominatorTree *DT) {
-  bool Changed = false;
-
-  CriticalEdgeSplittingOptions Options(DT);
-  Options.setMergeIdenticalEdges();
-
-  // The indirect destination might be duplicated between another parameter...
-  //
-  //   %0 = callbr ... [label %x, label %x]
-  //
-  // ...hence MergeIdenticalEdges and AllowIndentical edges, but we don't need
-  // to split the default destination if it's duplicated between an indirect
-  // destination...
-  //
-  //   %1 = callbr ... to label %x [label %x]
-  //
-  // ...hence starting at 1 and checking against successor 0 (aka the default
-  // destination).
-  for (unsigned i = 1, e = CBR->getNumSuccessors(); i != e; ++i)
-    if (CBR->getSuccessor(i) == CBR->getSuccessor(0) ||
-        isCriticalEdge(CBR, i, /*AllowIdenticalEdges*/ true))
-      if (SplitKnownCriticalEdge(CBR, i, Options))
-        Changed = true;
-
-  return Changed;
-}
-
-static bool InsertIntrinsicCalls(CallBrInst *CBR, DominatorTree &DT) {
-  bool Changed = false;
-  SmallPtrSet<const BasicBlock *, 4> Visited;
-  IRBuilder<> Builder(CBR->getContext());
-
-  if (!CBR->getNumIndirectDests())
-    return false;
-
-  SSAUpdater SSAUpdate;
-  SSAUpdate.Initialize(CBR->getType(), CBR->getName());
-  SSAUpdate.AddAvailableValue(CBR->getParent(), CBR);
-  SSAUpdate.AddAvailableValue(CBR->getDefaultDest(), CBR);
-
-  for (BasicBlock *IndDest : CBR->getIndirectDests()) {
-    if (!Visited.insert(IndDest).second)
-      continue;
-
-    Builder.SetInsertPoint(&*IndDest->begin());
-    CallInst *Intrinsic = Builder.CreateIntrinsic(
-        CBR->getType(), Intrinsic::callbr_landingpad, {CBR});
-    SSAUpdate.AddAvailableValue(IndDest, Intrinsic);
-    UpdateSSA(DT, CBR, Intrinsic, SSAUpdate);
-    Changed = true;
-  }
-
-  return Changed;
-}
-
-static bool ProcessCallBrInst(Function &F, CallBrInst *CBR, DominatorTree *DT) {
-  bool Changed = false;
-
-  Changed |= SplitCriticalEdges(CBR, DT);
-  Changed |= InsertIntrinsicCalls(CBR, *DT);
-
-  return Changed;
-}
-
-static bool runImpl(Function &F, ArrayRef<CallBase *> IAs, DominatorTree *DT) {
-  bool Changed = false;
-
-  for (CallBase *CB : IAs)
-    if (auto *CBR = dyn_cast<CallBrInst>(CB))
-      Changed |= ProcessCallBrInst(F, CBR, DT);
-    else
-      Changed |= ProcessInlineAsm(F, CB);
-
-  return Changed;
-}
-
-/// Find all inline assembly calls that need preparation. This always collects
-/// CallBrInsts (which need SSA fixups), and at -O0 also collects regular
-/// inline asm calls (which need "rm" to "m" constraint conversion for the fast
-/// register allocator).
-static SmallVector<CallBase *, 4>
-FindInlineAsmCandidates(Function &F, const TargetMachine *TM) {
-  bool isOptLevelNone = TM->getOptLevel() == CodeGenOptLevel::None;
-  SmallVector<CallBase *, 4> InlineAsms;
-
-  for (BasicBlock &BB : F) {
-    if (auto *CBR = dyn_cast<CallBrInst>(BB.getTerminator())) {
-      if (!CBR->getType()->isVoidTy() && !CBR->use_empty())
-        InlineAsms.push_back(CBR);
-      continue;
-    }
-
-    if (isOptLevelNone)
-      // Only inline assembly compiled at '-O0' (i.e. uses the fast register
-      // allocator) needs to be processed.
-      for (Instruction &I : BB)
-        if (CallBase *CB = dyn_cast<CallBase>(&I); CB && CB->isInlineAsm())
-          InlineAsms.push_back(CB);
-  }
-
-  return InlineAsms;
-}
-
-bool InlineAsmPrepare::runOnFunction(Function &F) {
-  const auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
-  SmallVector<CallBase *, 4> IAs = FindInlineAsmCandidates(F, TM);
-  if (IAs.empty())
-    return false;
-
-  // It's highly likely that most programs do not contain CallBrInsts. Follow a
-  // similar pattern from SafeStackLegacyPass::runOnFunction to reuse previous
-  // domtree analysis if available, otherwise compute it lazily. This avoids
-  // forcing Dominator Tree Construction at -O0 for programs that likely do not
-  // contain CallBrInsts. It does pessimize programs with callbr at higher
-  // optimization levels, as the DominatorTree created here is not reused by
-  // subsequent passes.
-  DominatorTree *DT;
-  std::optional<DominatorTree> LazilyComputedDomTree;
-  if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
-    DT = &DTWP->getDomTree();
-  else {
-    LazilyComputedDomTree.emplace(F);
-    DT = &*LazilyComputedDomTree;
-  }
-
-  return runImpl(F, IAs, DT);
-}
-
-PreservedAnalyses InlineAsmPreparePass::run(Function &F,
-                                            FunctionAnalysisManager &FAM) {
-  SmallVector<CallBase *, 4> IAs = FindInlineAsmCandidates(F, TM);
-  if (IAs.empty())
-    return PreservedAnalyses::all();
-
-  DominatorTree *DT = &FAM.getResult<DominatorTreeAnalysis>(F);
-
-  if (runImpl(F, IAs, DT)) {
-    PreservedAnalyses PA;
-    PA.preserve<DominatorTreeAnalysis>();
-    return PA;
-  }
-
-  return PreservedAnalyses::all();
-}

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 62a2ac693d3bd..7c762ed6d91ce 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1032,8 +1032,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
 }
 
 void RegsForValue::AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching,
-                                        unsigned MatchingIdx,
-                                        bool MayFoldRegister, const SDLoc &dl,
+                                        unsigned MatchingIdx, const SDLoc &dl,
                                         SelectionDAG &DAG,
                                         std::vector<SDValue> &Ops) const {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -1049,9 +1048,7 @@ void RegsForValue::AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching,
     // from the def.
     const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
     const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
-
     Flag.setRegClass(RC->getID());
-    Flag.setRegMayBeFolded(MayFoldRegister);
   }
 
   SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
@@ -10335,8 +10332,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
         AsmNodeOperands.push_back(OpInfo.CallOperand);
       } else {
         // Otherwise, this outputs to a register (directly for C_Register /
-        // C_RegisterClass, and a target-defined fashion for C_Immediate /
-        // C_Other). Find a register that we can use.
+        // C_RegisterClass, and a target-defined fashion for
+        // C_Immediate/C_Other). Find a register that we can use.
         if (OpInfo.AssignedRegs.Regs.empty()) {
           emitInlineAsmError(
               Call, "couldn't allocate output register for constraint '" +
@@ -10352,8 +10349,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
         OpInfo.AssignedRegs.AddInlineAsmOperands(
             OpInfo.isEarlyClobber ? InlineAsm::Kind::RegDefEarlyClobber
                                   : InlineAsm::Kind::RegDef,
-            false, 0, OpInfo.MayFoldRegister, getCurSDLoc(), DAG,
-            AsmNodeOperands);
+            false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
       }
       break;
 
@@ -10395,9 +10391,9 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
           SDLoc dl = getCurSDLoc();
           // Use the produced MatchedRegs object to
           MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue, &Call);
-          MatchedRegs.AddInlineAsmOperands(
-              InlineAsm::Kind::RegUse, true, OpInfo.getMatchedOperand(),
-              OpInfo.MayFoldRegister, dl, DAG, AsmNodeOperands);
+          MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind::RegUse, true,
+                                           OpInfo.getMatchedOperand(), dl, DAG,
+                                           AsmNodeOperands);
           break;
         }
 
@@ -10529,8 +10525,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
                                         &Call);
 
       OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind::RegUse, false,
-                                               0, OpInfo.MayFoldRegister, dl,
-                                               DAG, AsmNodeOperands);
+                                               0, dl, DAG, AsmNodeOperands);
       break;
     }
     case InlineAsm::isClobber:
@@ -10538,8 +10533,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
       // allocator is aware that the physreg got clobbered.
       if (!OpInfo.AssignedRegs.Regs.empty())
         OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind::Clobber,
-                                                 false, 0, false, getCurSDLoc(),
-                                                 DAG, AsmNodeOperands);
+                                                 false, 0, getCurSDLoc(), DAG,
+                                                 AsmNodeOperands);
       break;
     }
   }

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 845d06f829730..f8aecea25b3d6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -822,9 +822,8 @@ struct RegsForValue {
   /// code marker, matching input operand index (if applicable), and includes
   /// the number of values added into it.
   void AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching,
-                            unsigned MatchingIdx, bool MayFoldRegister,
-                            const SDLoc &dl, SelectionDAG &DAG,
-                            std::vector<SDValue> &Ops) const;
+                            unsigned MatchingIdx, const SDLoc &dl,
+                            SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
 
   /// Check if the total RegCount is greater than one.
   bool occupiesMultipleRegs() const {

diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 26bf95daa8289..e4b4d80896fa7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5989,16 +5989,6 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
 
     OpInfo.ConstraintVT = MVT::Other;
 
-    // Special treatment for all platforms that can fold a register into a
-    // spill. This is used for the "rm" constraint, where we would vastly
-    // prefer to use 'r' over 'm'. The non-fast register allocators are able to
-    // handle the 'r' default by folding. The fast register allocator needs
-    // special handling to convert the instruction to use 'm' instead.
-    if (!OpInfo.hasMatchingInput() && OpInfo.Codes.size() == 2 &&
-        llvm::is_contained(OpInfo.Codes, "r") &&
-        llvm::is_contained(OpInfo.Codes, "m"))
-      OpInfo.MayFoldRegister = true;
-
     // Compute the value type for each operand.
     switch (OpInfo.Type) {
     case InlineAsm::isOutput: {
@@ -6279,12 +6269,7 @@ TargetLowering::ConstraintWeight
 ///  1) If there is an 'other' constraint, and if the operand is valid for
 ///     that constraint, use it.  This makes us take advantage of 'i'
 ///     constraints when available.
-///  2) Special processing is done for the "rm" constraint. If specified, we
-///     opt for the 'r' constraint, but mark the operand as being "foldable."
-///     In the face of register exhaustion, the register allocator is free to
-///     choose to use a stack slot. The fast register allocator is handled
-///     separately via the InlineAsmPrepare pass.
-///  3) Otherwise, pick the most general constraint present.  This prefers
+///  2) Otherwise, pick the most general constraint present.  This prefers
 ///     'm' over 'r', for example.
 ///
 TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
@@ -6292,20 +6277,6 @@ TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
   ConstraintGroup Ret;
 
   Ret.reserve(OpInfo.Codes.size());
-
-  // If we can fold the register (i.e. it has an "rm" constraint), opt for the
-  // 'r' constraint, and allow the register allocator to spill if need be.
-  //
-  // Note: This code is a holdover from when the Clang front-end defaulted to
-  // using the memory constriaint. This should be reviewed at some point to
-  // remove that assumption from the back-end.
-  const TargetMachine &TM = getTargetMachine();
-  if (TM.getOptLevel() != CodeGenOptLevel::None && OpInfo.MayFoldRegister) {
-    Ret.emplace_back(ConstraintPair("r", getConstraintType("r")));
-    Ret.emplace_back(ConstraintPair("m", getConstraintType("m")));
-    return Ret;
-  }
-
   for (StringRef Code : OpInfo.Codes) {
     TargetLowering::ConstraintType CType = getConstraintType(Code);
 

diff  --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 7e49693dd7cc0..17536319a69e7 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -985,7 +985,7 @@ void TargetPassConfig::addISelPrepare() {
   if (getOptLevel() != CodeGenOptLevel::None)
     addPass(createObjCARCContractPass());
 
-  addPass(createInlineAsmPass());
+  addPass(createCallBrPass());
 
   // Add both the safe stack and the stack protection passes: each of them will
   // only protect functions that have corresponding attributes.

diff  --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 61f653fe30b20..45955426d66a0 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -83,6 +83,7 @@
 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
 #include "llvm/CodeGen/BranchFoldingPass.h"
 #include "llvm/CodeGen/BranchRelaxation.h"
+#include "llvm/CodeGen/CallBrPrepare.h"
 #include "llvm/CodeGen/CodeGenPrepare.h"
 #include "llvm/CodeGen/ComplexDeinterleavingPass.h"
 #include "llvm/CodeGen/DeadMachineInstructionElim.h"
@@ -105,7 +106,6 @@
 #include "llvm/CodeGen/HardwareLoops.h"
 #include "llvm/CodeGen/IndirectBrExpand.h"
 #include "llvm/CodeGen/InitUndef.h"
-#include "llvm/CodeGen/InlineAsmPrepare.h"
 #include "llvm/CodeGen/InterleavedAccess.h"
 #include "llvm/CodeGen/InterleavedLoadCombine.h"
 #include "llvm/CodeGen/JMCInstrumenter.h"

diff  --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index a837d3aa5d354..2cfb5b2592601 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -411,6 +411,7 @@ FUNCTION_PASS("assume-simplify", AssumeSimplifyPass())
 FUNCTION_PASS("atomic-expand", AtomicExpandPass(*TM))
 FUNCTION_PASS("bdce", BDCEPass())
 FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass())
+FUNCTION_PASS("callbr-prepare", CallBrPreparePass())
 FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass())
 FUNCTION_PASS("chr", ControlHeightReductionPass())
 FUNCTION_PASS("codegenprepare", CodeGenPreparePass(*TM))
@@ -449,7 +450,6 @@ FUNCTION_PASS("helloworld", HelloWorldPass())
 FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass(*TM))
 FUNCTION_PASS("infer-address-spaces", InferAddressSpacesPass())
 FUNCTION_PASS("infer-alignment", InferAlignmentPass())
-FUNCTION_PASS("inline-asm-prepare", InlineAsmPreparePass(*TM))
 FUNCTION_PASS("inject-tli-mappings", InjectTLIMappings())
 FUNCTION_PASS("instcount", InstCountPass())
 FUNCTION_PASS("instnamer", InstructionNamerPass())

diff  --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
index 9f9e47865c1b8..cc0655b31d892 100644
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -33,7 +33,7 @@
 ; CHECK-NEXT:       Optimization Remark Emitter
 ; CHECK-NEXT:       AArch64 Stack Tagging
 ; CHECK-NEXT:       Exception handling preparation
-; CHECK-NEXT:       Prepare inline asm insts
+; CHECK-NEXT:       Prepare callbr
 ; CHECK-NEXT:       Safe Stack instrumentation pass
 ; CHECK-NEXT:       Insert stack protectors
 ; CHECK-NEXT:       Module Verifier

diff  --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 620041253ecfc..472f1f616c600 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -110,7 +110,7 @@
 ; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
 ; CHECK-NEXT:       Function Alias Analysis Results
 ; CHECK-NEXT:       ObjC ARC contraction
-; CHECK-NEXT:       Prepare inline asm insts
+; CHECK-NEXT:       Prepare callbr
 ; CHECK-NEXT:       Safe Stack instrumentation pass
 ; CHECK-NEXT:       Insert stack protectors
 ; CHECK-NEXT:       Module Verifier

diff  --git a/llvm/test/CodeGen/AArch64/callbr-prepare.ll b/llvm/test/CodeGen/AArch64/callbr-prepare.ll
index c7c976f373efd..826e27d92720f 100644
--- a/llvm/test/CodeGen/AArch64/callbr-prepare.ll
+++ b/llvm/test/CodeGen/AArch64/callbr-prepare.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt %s -mtriple=aarch64-linux-gnu -inline-asm-prepare -S -o - | FileCheck %s
-; RUN: opt %s -mtriple=aarch64-linux-gnu -passes=inline-asm-prepare -S -o - | FileCheck %s
+; RUN: opt %s -callbrprepare -S -o - | FileCheck %s
+; RUN: opt %s -passes=callbr-prepare -S -o - | FileCheck %s
 
 define i32 @test0() {
 ; CHECK-LABEL: @test0(
@@ -407,7 +407,7 @@ foo:
 }
 
 ; Test the result of the callbr having multiple uses to avoid iterator
-; invalidation bugs in InlineAsmPrepare::UpdateSSA.
+; invalidation bugs in CallBrPrepare::UpdateSSA.
 define i32 @multiple_split() {
 ; CHECK-LABEL: @multiple_split(
 ; CHECK-NEXT:  entry:

diff  --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index b1d9d618302a8..525ab3757e6e0 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -47,7 +47,7 @@
 ; GCN-O0-NEXT: amdgpu-rewrite-undef-for-phi
 ; GCN-O0-NEXT: lcssa
 ; GCN-O0-NEXT: require<uniformity>
-; GCN-O0-NEXT: inline-asm-prepare
+; GCN-O0-NEXT: callbr-prepare
 ; GCN-O0-NEXT: safe-stack
 ; GCN-O0-NEXT: stack-protector
 ; GCN-O0-NEXT: verify))
@@ -161,7 +161,7 @@
 ; GCN-O2-NEXT: amdgpu-perf-hint
 ; GCN-O2-NEXT: cgscc(function(require<uniformity>
 ; GCN-O2-NEXT: objc-arc-contract
-; GCN-O2-NEXT: inline-asm-prepare
+; GCN-O2-NEXT: callbr-prepare
 ; GCN-O2-NEXT: safe-stack
 ; GCN-O2-NEXT: stack-protector
 ; GCN-O2-NEXT: verify))
@@ -333,7 +333,7 @@
 ; GCN-O3-NEXT: amdgpu-perf-hint
 ; GCN-O3-NEXT: cgscc(function(require<uniformity>
 ; GCN-O3-NEXT: objc-arc-contract
-; GCN-O3-NEXT: inline-asm-prepare
+; GCN-O3-NEXT: callbr-prepare
 ; GCN-O3-NEXT: safe-stack
 ; GCN-O3-NEXT: stack-protector
 ; GCN-O3-NEXT: verify))

diff  --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 2904ba604fb1b..584b3b4d9874c 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -95,7 +95,7 @@
 ; GCN-O0-NEXT:    Call Graph SCC Pass Manager
 ; GCN-O0-NEXT:      DummyCGSCCPass
 ; GCN-O0-NEXT:      FunctionPass Manager
-; GCN-O0-NEXT:        Prepare inline asm insts
+; GCN-O0-NEXT:        Prepare callbr
 ; GCN-O0-NEXT:        Safe Stack instrumentation pass
 ; GCN-O0-NEXT:        Insert stack protectors
 ; GCN-O0-NEXT:        Dominator Tree Construction
@@ -302,7 +302,7 @@
 ; GCN-O1-NEXT:        Basic Alias Analysis (stateless AA impl)
 ; GCN-O1-NEXT:        Function Alias Analysis Results
 ; GCN-O1-NEXT:        ObjC ARC contraction
-; GCN-O1-NEXT:        Prepare inline asm insts
+; GCN-O1-NEXT:        Prepare callbr
 ; GCN-O1-NEXT:        Safe Stack instrumentation pass
 ; GCN-O1-NEXT:        Insert stack protectors
 ; GCN-O1-NEXT:        Cycle Info Analysis
@@ -615,7 +615,7 @@
 ; GCN-O1-OPTS-NEXT:        Basic Alias Analysis (stateless AA impl)
 ; GCN-O1-OPTS-NEXT:        Function Alias Analysis Results
 ; GCN-O1-OPTS-NEXT:        ObjC ARC contraction
-; GCN-O1-OPTS-NEXT:        Prepare inline asm insts
+; GCN-O1-OPTS-NEXT:        Prepare callbr
 ; GCN-O1-OPTS-NEXT:        Safe Stack instrumentation pass
 ; GCN-O1-OPTS-NEXT:        Insert stack protectors
 ; GCN-O1-OPTS-NEXT:        Cycle Info Analysis
@@ -939,7 +939,7 @@
 ; GCN-O2-NEXT:        Basic Alias Analysis (stateless AA impl)
 ; GCN-O2-NEXT:        Function Alias Analysis Results
 ; GCN-O2-NEXT:        ObjC ARC contraction
-; GCN-O2-NEXT:        Prepare inline asm insts
+; GCN-O2-NEXT:        Prepare callbr
 ; GCN-O2-NEXT:        Safe Stack instrumentation pass
 ; GCN-O2-NEXT:        Insert stack protectors
 ; GCN-O2-NEXT:        Cycle Info Analysis
@@ -1277,7 +1277,7 @@
 ; GCN-O3-NEXT:        Basic Alias Analysis (stateless AA impl)
 ; GCN-O3-NEXT:        Function Alias Analysis Results
 ; GCN-O3-NEXT:        ObjC ARC contraction
-; GCN-O3-NEXT:        Prepare inline asm insts
+; GCN-O3-NEXT:        Prepare callbr
 ; GCN-O3-NEXT:        Safe Stack instrumentation pass
 ; GCN-O3-NEXT:        Insert stack protectors
 ; GCN-O3-NEXT:        Cycle Info Analysis

diff  --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 98bb87524db44..5801d3fe55c59 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -69,7 +69,7 @@
 ; CHECK-NEXT:      Basic Alias Analysis (stateless AA impl)
 ; CHECK-NEXT:      Function Alias Analysis Results
 ; CHECK-NEXT:      ObjC ARC contraction
-; CHECK-NEXT:      Prepare inline asm insts
+; CHECK-NEXT:      Prepare callbr
 ; CHECK-NEXT:      Safe Stack instrumentation pass
 ; CHECK-NEXT:      Insert stack protectors
 ; CHECK-NEXT:      Module Verifier

diff  --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
index bf519342fa4cc..ad7eee3f975f6 100644
--- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
@@ -31,7 +31,7 @@
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
 ; CHECK-NEXT:       Expand reduction intrinsics
 ; CHECK-NEXT:       Exception handling preparation
-; CHECK-NEXT:       Prepare inline asm insts
+; CHECK-NEXT:       Prepare callbr
 ; CHECK-NEXT:       Safe Stack instrumentation pass
 ; CHECK-NEXT:       Insert stack protectors
 ; CHECK-NEXT:       Module Verifier

diff  --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
index 262ee06c6f732..dd7bb2b6d19ba 100644
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -76,7 +76,7 @@
 ; LAXX-NEXT:       Basic Alias Analysis (stateless AA impl)
 ; LAXX-NEXT:       Function Alias Analysis Results
 ; LAXX-NEXT:       ObjC ARC contraction
-; LAXX-NEXT:       Prepare inline asm insts
+; LAXX-NEXT:       Prepare callbr
 ; LAXX-NEXT:       Safe Stack instrumentation pass
 ; LAXX-NEXT:       Insert stack protectors
 ; LAXX-NEXT:       Module Verifier

diff  --git a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll
index b0ba623edfb0a..d586328c5062e 100644
--- a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll
@@ -30,7 +30,7 @@
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
 ; CHECK-NEXT:       Expand reduction intrinsics
 ; CHECK-NEXT:       Exception handling preparation
-; CHECK-NEXT:       Prepare inline asm insts
+; CHECK-NEXT:       Prepare callbr
 ; CHECK-NEXT:       Safe Stack instrumentation pass
 ; CHECK-NEXT:       Insert stack protectors
 ; CHECK-NEXT:       Module Verifier

diff  --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
index f771b5728e5b5..31c0d8558f96a 100644
--- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
@@ -85,7 +85,7 @@
 ; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
 ; CHECK-NEXT:       Function Alias Analysis Results
 ; CHECK-NEXT:       ObjC ARC contraction
-; CHECK-NEXT:       Prepare inline asm insts
+; CHECK-NEXT:       Prepare callbr
 ; CHECK-NEXT:       Safe Stack instrumentation pass
 ; CHECK-NEXT:       Insert stack protectors
 ; CHECK-NEXT:       Module Verifier

diff  --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index 847a8bd96c6d6..c3e0ed9b85ec7 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -32,7 +32,7 @@
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
 ; CHECK-NEXT:       Expand reduction intrinsics
 ; CHECK-NEXT:       Exception handling preparation
-; CHECK-NEXT:       Prepare inline asm insts
+; CHECK-NEXT:       Prepare callbr
 ; CHECK-NEXT:       Safe Stack instrumentation pass
 ; CHECK-NEXT:       Insert stack protectors
 ; CHECK-NEXT:       Module Verifier

diff  --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 2b9558e0c69e3..c48e3859850cc 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -93,7 +93,7 @@
 ; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
 ; CHECK-NEXT:       Function Alias Analysis Results
 ; CHECK-NEXT:       ObjC ARC contraction
-; CHECK-NEXT:       Prepare inline asm insts
+; CHECK-NEXT:       Prepare callbr
 ; CHECK-NEXT:       Safe Stack instrumentation pass
 ; CHECK-NEXT:       Insert stack protectors
 ; CHECK-NEXT:       Module Verifier

diff  --git a/llvm/test/CodeGen/SPIRV/llc-pipeline.ll b/llvm/test/CodeGen/SPIRV/llc-pipeline.ll
index eb1128ac5417a..a4fcd9e6d9223 100644
--- a/llvm/test/CodeGen/SPIRV/llc-pipeline.ll
+++ b/llvm/test/CodeGen/SPIRV/llc-pipeline.ll
@@ -47,7 +47,7 @@
 ; SPIRV-O0-NEXT:    SPIRV emit intrinsics
 ; SPIRV-O0-NEXT:    FunctionPass Manager
 ; SPIRV-O0-NEXT:      SPIRV legalize bitcast pass
-; SPIRV-O0-NEXT:      Prepare inline asm insts
+; SPIRV-O0-NEXT:      Prepare callbr
 ; SPIRV-O0-NEXT:      Safe Stack instrumentation pass
 ; SPIRV-O0-NEXT:      Insert stack protectors
 ; SPIRV-O0-NEXT:      Analysis containing CSE Info
@@ -160,7 +160,7 @@
 ; SPIRV-Opt-NEXT:      Basic Alias Analysis (stateless AA impl)
 ; SPIRV-Opt-NEXT:      Function Alias Analysis Results
 ; SPIRV-Opt-NEXT:      ObjC ARC contraction
-; SPIRV-Opt-NEXT:      Prepare inline asm insts
+; SPIRV-Opt-NEXT:      Prepare callbr
 ; SPIRV-Opt-NEXT:      Safe Stack instrumentation pass
 ; SPIRV-Opt-NEXT:      Insert stack protectors
 ; SPIRV-Opt-NEXT:      Analysis containing CSE Info

diff  --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll
index e8a3084563573..673b36968bdeb 100644
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -32,7 +32,7 @@
 ; CHECK-NEXT:       Expand reduction intrinsics
 ; CHECK-NEXT:       Expand indirectbr instructions
 ; CHECK-NEXT:       Exception handling preparation
-; CHECK-NEXT:       Prepare inline asm insts
+; CHECK-NEXT:       Prepare callbr
 ; CHECK-NEXT:       Safe Stack instrumentation pass
 ; CHECK-NEXT:       Insert stack protectors
 ; CHECK-NEXT:       Module Verifier

diff  --git a/llvm/test/CodeGen/X86/asm-constraints-rm.ll b/llvm/test/CodeGen/X86/asm-constraints-rm.ll
deleted file mode 100644
index 086b430bccac8..0000000000000
--- a/llvm/test/CodeGen/X86/asm-constraints-rm.ll
+++ /dev/null
@@ -1,1307 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter "^\t(mov|call|#)" --version 4
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -O2 < %s | FileCheck --check-prefix=O2 %s
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -O0 < %s | FileCheck --check-prefix=O0 %s
-
-; The non-fast register allocators should use registers when there isn't
-; register pressure.
-
-define dso_local i32 @test1(ptr noundef readonly captures(none) %foo) local_unnamed_addr {
-; O2-LABEL: test1:
-; O2:    movl (%rdi), %eax
-; O2:    movl 4(%rdi), %ecx
-; O2:    movl 8(%rdi), %edx
-; O2:    movl 12(%rdi), %esi
-; O2:    movl 16(%rdi), %r8d
-; O2:    #APP
-; O2:    # rm input: no pressure
-; O2:    # %eax %ecx %edx %esi %r8d
-; O2:    #NO_APP
-; O2:    movl (%rdi), %eax
-;
-; O0-LABEL: test1:
-; O0:    movl (%rdi), %r8d
-; O0:    movl 4(%rdi), %esi
-; O0:    movl 8(%rdi), %edx
-; O0:    movl 12(%rdi), %ecx
-; O0:    movl 16(%rdi), %eax
-; O0:    movl %r8d, -{{[0-9]+}}(%rsp)
-; O0:    movl %esi, -{{[0-9]+}}(%rsp)
-; O0:    movl %edx, -{{[0-9]+}}(%rsp)
-; O0:    movl %ecx, -{{[0-9]+}}(%rsp)
-; O0:    movl %eax, -{{[0-9]+}}(%rsp)
-; O0:    movq %rax, -{{[0-9]+}}(%rsp)
-; O0:    movq %rax, -{{[0-9]+}}(%rsp)
-; O0:    movq %rax, -{{[0-9]+}}(%rsp)
-; O0:    movq %rax, -{{[0-9]+}}(%rsp)
-; O0:    movq %rax, -{{[0-9]+}}(%rsp)
-; O0:    #APP
-; O0:    # rm input: no pressure
-; O0:    # -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp)
-; O0:    #NO_APP
-; O0:    movl (%rdi), %eax
-entry:
-  %0 = load i32, ptr %foo, align 4
-  %b = getelementptr inbounds nuw i8, ptr %foo, i64 4
-  %1 = load i32, ptr %b, align 4
-  %c = getelementptr inbounds nuw i8, ptr %foo, i64 8
-  %2 = load i32, ptr %c, align 4
-  %d = getelementptr inbounds nuw i8, ptr %foo, i64 12
-  %3 = load i32, ptr %d, align 4
-  %e = getelementptr inbounds nuw i8, ptr %foo, i64 16
-  %4 = load i32, ptr %e, align 4
-  tail call void asm sideeffect "# rm input: no pressure\0A\09# $0 $1 $2 $3 $4", "rm,rm,rm,rm,rm,~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
-  %5 = load i32, ptr %foo, align 4
-  ret i32 %5
-}
-
-define dso_local i32 @test2(ptr noundef readonly captures(none) %foo) local_unnamed_addr {
-; O2-LABEL: test2:
-; O2:    movq %rdi, (%rsp) # 8-byte Spill
-; O2:    #APP
-; O2:    movq $0, %rax
-; O2:    movq $1, %rcx
-; O2:    movq $2, %rdx
-; O2:    movq $3, %rsi
-; O2:    movq $4, %rdi
-; O2:    movq $5, %rbx
-; O2:    movq $6, %rbp
-; O2:    movq $7, %r8
-; O2:    movq $8, %r9
-; O2:    movq $9, %r10
-; O2:    movq $10, %r11
-; O2:    movq $11, %r12
-; O2:    movq $12, %r13
-; O2:    movq $13, %r14
-; O2:    movq $14, %r15
-; O2:    #NO_APP
-; O2:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %r9, %rbp
-; O2:    movq (%rsp), %rbx # 8-byte Reload
-; O2:    movl (%rbx), %esi
-; O2:    movl 4(%rbx), %edi
-; O2:    movl 8(%rbx), %r8d
-; O2:    movl 12(%rbx), %r9d
-; O2:    movl 16(%rbx), %eax
-; O2:    #APP
-; O2:    # rm input: pressure
-; O2:    # %esi %edi %r8d %r9d %eax
-; O2:    #NO_APP
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O2:    movq %rcx, %rsi
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; O2:    callq g at PLT
-; O2:    movl (%rbx), %eax
-;
-; O0-LABEL: test2:
-; O0:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    #APP
-; O0:    movq $0, %rax
-; O0:    movq $1, %rcx
-; O0:    movq $2, %rdx
-; O0:    movq $3, %rsi
-; O0:    movq $4, %rdi
-; O0:    movq $5, %rbx
-; O0:    movq $6, %rbp
-; O0:    movq $7, %r8
-; O0:    movq $8, %r9
-; O0:    movq $9, %r10
-; O0:    movq $10, %r11
-; O0:    movq $11, %r12
-; O0:    movq $12, %r13
-; O0:    movq $13, %r14
-; O0:    movq $14, %r15
-; O0:    #NO_APP
-; O0:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rsi, %rcx
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; O0:    movq %rdi, %rax
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %r8, %rbx
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; O0:    movq %r9, %rax
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; O0:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; O0:    movl (%rax), %eax
-; O0:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; O0:    movl 4(%rax), %eax
-; O0:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; O0:    movl 8(%rax), %eax
-; O0:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; O0:    movl 12(%rax), %eax
-; O0:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; O0:    movl 16(%rax), %eax
-; O0:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %eax, {{[0-9]+}}(%rsp)
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %eax, {{[0-9]+}}(%rsp)
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %eax, {{[0-9]+}}(%rsp)
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %eax, {{[0-9]+}}(%rsp)
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %eax, {{[0-9]+}}(%rsp)
-; O0:    movq %rax, {{[0-9]+}}(%rsp)
-; O0:    movq %rax, {{[0-9]+}}(%rsp)
-; O0:    movq %rax, {{[0-9]+}}(%rsp)
-; O0:    movq %rax, {{[0-9]+}}(%rsp)
-; O0:    movq %rax, {{[0-9]+}}(%rsp)
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; O0:    #APP
-; O0:    # rm input: pressure
-; O0:    # {{[0-9]+}}(%rsp) {{[0-9]+}}(%rsp) {{[0-9]+}}(%rsp) {{[0-9]+}}(%rsp) {{[0-9]+}}(%rsp)
-; O0:    #NO_APP
-; O0:    movq %rbp, (%rsp)
-; O0:    movq %rbx, {{[0-9]+}}(%rsp)
-; O0:    movq %rax, {{[0-9]+}}(%rsp)
-; O0:    movq %r10, {{[0-9]+}}(%rsp)
-; O0:    movq %r11, {{[0-9]+}}(%rsp)
-; O0:    movq %r12, {{[0-9]+}}(%rsp)
-; O0:    movq %r13, {{[0-9]+}}(%rsp)
-; O0:    movq %r14, {{[0-9]+}}(%rsp)
-; O0:    movq %r15, {{[0-9]+}}(%rsp)
-; O0:    callq g at PLT
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movl (%rdi), %eax
-entry:
-  %0 = tail call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "mov $$0, $0\0A\09mov $$1, $1\0A\09mov $$2, $2\0A\09mov $$3, $3\0A\09mov $$4, $4\0A\09mov $$5, $5\0A\09mov $$6, $6\0A\09mov $$7, $7\0A\09mov $$8, $8\0A\09mov $$9, $9\0A\09mov $$10, $10\0A\09mov $$11, $11\0A\09mov $$12, $12\0A\09mov $$13, $13\0A\09mov $$14, $14", "={rax},={rcx},={rdx},={rsi},={rdi},={rbx},={rbp},={r8},={r9},={r10},={r11},={r12},={r13},={r14},={r15},~{dirflag},~{fpsr},~{flags}"()
-  %asmresult = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 0
-  %asmresult1 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 1
-  %asmresult2 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 2
-  %asmresult3 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 3
-  %asmresult4 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 4
-  %asmresult5 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 5
-  %asmresult6 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 6
-  %asmresult7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 7
-  %asmresult8 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 8
-  %asmresult9 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 9
-  %asmresult10 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 10
-  %asmresult11 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 11
-  %asmresult12 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 12
-  %asmresult13 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 13
-  %asmresult14 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 14
-  %1 = load i32, ptr %foo, align 4
-  %b = getelementptr inbounds nuw i8, ptr %foo, i64 4
-  %2 = load i32, ptr %b, align 4
-  %c = getelementptr inbounds nuw i8, ptr %foo, i64 8
-  %3 = load i32, ptr %c, align 4
-  %d = getelementptr inbounds nuw i8, ptr %foo, i64 12
-  %4 = load i32, ptr %d, align 4
-  %e = getelementptr inbounds nuw i8, ptr %foo, i64 16
-  %5 = load i32, ptr %e, align 4
-  tail call void asm sideeffect "# rm input: pressure\0A\09# $0 $1 $2 $3 $4", "rm,rm,rm,rm,rm,~{dirflag},~{fpsr},~{flags}"(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5)
-  tail call void @g(i64 noundef %asmresult, i64 noundef %asmresult1, i64 noundef %asmresult2, i64 noundef %asmresult3, i64 noundef %asmresult4, i64 noundef %asmresult5, i64 noundef %asmresult6, i64 noundef %asmresult7, i64 noundef %asmresult8, i64 noundef %asmresult9, i64 noundef %asmresult10, i64 noundef %asmresult11, i64 noundef %asmresult12, i64 noundef %asmresult13, i64 noundef %asmresult14)
-  %6 = load i32, ptr %foo, align 4
-  ret i32 %6
-}
-
-define dso_local i32 @test3(ptr noundef writeonly captures(none) initializes((0, 20)) %foo) local_unnamed_addr {
-; O2-LABEL: test3:
-; O2:    #APP
-; O2:    # rm output: no pressure
-; O2:    # %eax %ecx %edx %esi %r8d
-; O2:    #NO_APP
-; O2:    movl %eax, (%rdi)
-; O2:    movl %ecx, 4(%rdi)
-; O2:    movl %edx, 8(%rdi)
-; O2:    movl %esi, 12(%rdi)
-; O2:    movl %r8d, 16(%rdi)
-;
-; O0-LABEL: test3:
-; O0:    #APP
-; O0:    # rm output: no pressure
-; O0:    # -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp)
-; O0:    #NO_APP
-; O0:    movl -{{[0-9]+}}(%rsp), %eax
-; O0:    movl -{{[0-9]+}}(%rsp), %r8d
-; O0:    movl -{{[0-9]+}}(%rsp), %esi
-; O0:    movl -{{[0-9]+}}(%rsp), %edx
-; O0:    movl -{{[0-9]+}}(%rsp), %ecx
-; O0:    movl %eax, (%rdi)
-; O0:    movl %r8d, 4(%rdi)
-; O0:    movl %esi, 8(%rdi)
-; O0:    movl %edx, 12(%rdi)
-; O0:    movl %ecx, 16(%rdi)
-entry:
-  %b = getelementptr inbounds nuw i8, ptr %foo, i64 4
-  %c = getelementptr inbounds nuw i8, ptr %foo, i64 8
-  %d = getelementptr inbounds nuw i8, ptr %foo, i64 12
-  %e = getelementptr inbounds nuw i8, ptr %foo, i64 16
-  %0 = tail call { i32, i32, i32, i32, i32 } asm sideeffect "# rm output: no pressure\0A\09# $0 $1 $2 $3 $4", "=rm,=rm,=rm,=rm,=rm,~{dirflag},~{fpsr},~{flags}"()
-  %asmresult = extractvalue { i32, i32, i32, i32, i32 } %0, 0
-  %asmresult1 = extractvalue { i32, i32, i32, i32, i32 } %0, 1
-  %asmresult2 = extractvalue { i32, i32, i32, i32, i32 } %0, 2
-  %asmresult3 = extractvalue { i32, i32, i32, i32, i32 } %0, 3
-  %asmresult4 = extractvalue { i32, i32, i32, i32, i32 } %0, 4
-  store i32 %asmresult, ptr %foo, align 4
-  store i32 %asmresult1, ptr %b, align 4
-  store i32 %asmresult2, ptr %c, align 4
-  store i32 %asmresult3, ptr %d, align 4
-  store i32 %asmresult4, ptr %e, align 4
-  ret i32 %asmresult
-}
-
-define dso_local i32 @test4(ptr noundef writeonly captures(none) initializes((0, 20)) %foo) local_unnamed_addr {
-; O2-LABEL: test4:
-; O2:    movq %rdi, (%rsp) # 8-byte Spill
-; O2:    #APP
-; O2:    movq $0, %rax
-; O2:    movq $1, %rcx
-; O2:    movq $2, %rdx
-; O2:    movq $3, %rsi
-; O2:    movq $4, %rdi
-; O2:    movq $5, %rbx
-; O2:    movq $6, %rbp
-; O2:    movq $7, %r8
-; O2:    movq $8, %r9
-; O2:    movq $9, %r10
-; O2:    movq $10, %r11
-; O2:    movq $11, %r12
-; O2:    movq $12, %r13
-; O2:    movq $13, %r14
-; O2:    movq $14, %r15
-; O2:    #NO_APP
-; O2:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %r9, %rbp
-; O2:    #APP
-; O2:    # rm output: pressure
-; O2:    # %esi %edi %r8d %r9d %eax
-; O2:    #NO_APP
-; O2:    movq (%rsp), %rbx # 8-byte Reload
-; O2:    movl %esi, (%rbx)
-; O2:    movl %edi, 4(%rbx)
-; O2:    movl %r8d, 8(%rbx)
-; O2:    movl %r9d, 12(%rbx)
-; O2:    movl %eax, 16(%rbx)
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O2:    movq %rcx, %rsi
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; O2:    callq g at PLT
-; O2:    movl (%rbx), %eax
-;
-; O0-LABEL: test4:
-; O0:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    #APP
-; O0:    movq $0, %rax
-; O0:    movq $1, %rcx
-; O0:    movq $2, %rdx
-; O0:    movq $3, %rsi
-; O0:    movq $4, %rdi
-; O0:    movq $5, %rbx
-; O0:    movq $6, %rbp
-; O0:    movq $7, %r8
-; O0:    movq $8, %r9
-; O0:    movq $9, %r10
-; O0:    movq $10, %r11
-; O0:    movq $11, %r12
-; O0:    movq $12, %r13
-; O0:    movq $13, %r14
-; O0:    movq $14, %r15
-; O0:    #NO_APP
-; O0:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rsi, %rcx
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; O0:    movq %rdi, %rax
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %r8, %rbx
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; O0:    movq %r9, %rax
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; O0:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    #APP
-; O0:    # rm output: pressure
-; O0:    # {{[0-9]+}}(%rsp) {{[0-9]+}}(%rsp) {{[0-9]+}}(%rsp) {{[0-9]+}}(%rsp) {{[0-9]+}}(%rsp)
-; O0:    #NO_APP
-; O0:    movl {{[0-9]+}}(%rsp), %eax
-; O0:    movl {{[0-9]+}}(%rsp), %edi
-; O0:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movl {{[0-9]+}}(%rsp), %edi
-; O0:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movl {{[0-9]+}}(%rsp), %edi
-; O0:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movl {{[0-9]+}}(%rsp), %edi
-; O0:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movl %eax, (%rdi)
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %eax, 4(%rdi)
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %eax, 8(%rdi)
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %eax, 12(%rdi)
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %eax, 16(%rdi)
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movq %rbp, (%rsp)
-; O0:    movq %rbx, {{[0-9]+}}(%rsp)
-; O0:    movq %rax, {{[0-9]+}}(%rsp)
-; O0:    movq %r10, {{[0-9]+}}(%rsp)
-; O0:    movq %r11, {{[0-9]+}}(%rsp)
-; O0:    movq %r12, {{[0-9]+}}(%rsp)
-; O0:    movq %r13, {{[0-9]+}}(%rsp)
-; O0:    movq %r14, {{[0-9]+}}(%rsp)
-; O0:    movq %r15, {{[0-9]+}}(%rsp)
-; O0:    callq g at PLT
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movl (%rdi), %eax
-entry:
-  %0 = tail call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "mov $$0, $0\0A\09mov $$1, $1\0A\09mov $$2, $2\0A\09mov $$3, $3\0A\09mov $$4, $4\0A\09mov $$5, $5\0A\09mov $$6, $6\0A\09mov $$7, $7\0A\09mov $$8, $8\0A\09mov $$9, $9\0A\09mov $$10, $10\0A\09mov $$11, $11\0A\09mov $$12, $12\0A\09mov $$13, $13\0A\09mov $$14, $14", "={rax},={rcx},={rdx},={rsi},={rdi},={rbx},={rbp},={r8},={r9},={r10},={r11},={r12},={r13},={r14},={r15},~{dirflag},~{fpsr},~{flags}"()
-  %asmresult = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 0
-  %asmresult1 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 1
-  %asmresult2 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 2
-  %asmresult3 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 3
-  %asmresult4 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 4
-  %asmresult5 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 5
-  %asmresult6 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 6
-  %asmresult7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 7
-  %asmresult8 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 8
-  %asmresult9 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 9
-  %asmresult10 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 10
-  %asmresult11 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 11
-  %asmresult12 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 12
-  %asmresult13 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 13
-  %asmresult14 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 14
-  %b = getelementptr inbounds nuw i8, ptr %foo, i64 4
-  %c = getelementptr inbounds nuw i8, ptr %foo, i64 8
-  %d = getelementptr inbounds nuw i8, ptr %foo, i64 12
-  %e = getelementptr inbounds nuw i8, ptr %foo, i64 16
-  %1 = tail call { i32, i32, i32, i32, i32 } asm sideeffect "# rm output: pressure\0A\09# $0 $1 $2 $3 $4", "=rm,=rm,=rm,=rm,=rm,~{dirflag},~{fpsr},~{flags}"()
-  %asmresult15 = extractvalue { i32, i32, i32, i32, i32 } %1, 0
-  %asmresult16 = extractvalue { i32, i32, i32, i32, i32 } %1, 1
-  %asmresult17 = extractvalue { i32, i32, i32, i32, i32 } %1, 2
-  %asmresult18 = extractvalue { i32, i32, i32, i32, i32 } %1, 3
-  %asmresult19 = extractvalue { i32, i32, i32, i32, i32 } %1, 4
-  store i32 %asmresult15, ptr %foo, align 4
-  store i32 %asmresult16, ptr %b, align 4
-  store i32 %asmresult17, ptr %c, align 4
-  store i32 %asmresult18, ptr %d, align 4
-  store i32 %asmresult19, ptr %e, align 4
-  tail call void @g(i64 noundef %asmresult, i64 noundef %asmresult1, i64 noundef %asmresult2, i64 noundef %asmresult3, i64 noundef %asmresult4, i64 noundef %asmresult5, i64 noundef %asmresult6, i64 noundef %asmresult7, i64 noundef %asmresult8, i64 noundef %asmresult9, i64 noundef %asmresult10, i64 noundef %asmresult11, i64 noundef %asmresult12, i64 noundef %asmresult13, i64 noundef %asmresult14)
-  %2 = load i32, ptr %foo, align 4
-  ret i32 %2
-}
-
-define dso_local i32 @test5(ptr noundef captures(none) %foo) local_unnamed_addr {
-; O2-LABEL: test5:
-; O2:    movl (%rdi), %eax
-; O2:    movl 4(%rdi), %ecx
-; O2:    movl 8(%rdi), %edx
-; O2:    movl 12(%rdi), %esi
-; O2:    movl 16(%rdi), %r8d
-; O2:    #APP
-; O2:    # rm tied output: no pressure
-; O2:    # %eax %ecx %edx %esi %r8d
-; O2:    #NO_APP
-; O2:    movl %eax, (%rdi)
-; O2:    movl %ecx, 4(%rdi)
-; O2:    movl %edx, 8(%rdi)
-; O2:    movl %esi, 12(%rdi)
-; O2:    movl %r8d, 16(%rdi)
-;
-; O0-LABEL: test5:
-; O0:    movl (%rdi), %r8d
-; O0:    movl 4(%rdi), %esi
-; O0:    movl 8(%rdi), %edx
-; O0:    movl 12(%rdi), %ecx
-; O0:    movl 16(%rdi), %eax
-; O0:    movl %r8d, -{{[0-9]+}}(%rsp)
-; O0:    movl %esi, -{{[0-9]+}}(%rsp)
-; O0:    movl %edx, -{{[0-9]+}}(%rsp)
-; O0:    movl %ecx, -{{[0-9]+}}(%rsp)
-; O0:    movl %eax, -{{[0-9]+}}(%rsp)
-; O0:    #APP
-; O0:    # rm tied output: no pressure
-; O0:    # %eax %ecx %edx %esi %r8d
-; O0:    #NO_APP
-; O0:    movl %r8d, -{{[0-9]+}}(%rsp)
-; O0:    movl %esi, -{{[0-9]+}}(%rsp)
-; O0:    movl %edx, -{{[0-9]+}}(%rsp)
-; O0:    movl %ecx, -{{[0-9]+}}(%rsp)
-; O0:    movl %eax, -{{[0-9]+}}(%rsp)
-; O0:    movl -{{[0-9]+}}(%rsp), %eax
-; O0:    movl -{{[0-9]+}}(%rsp), %r8d
-; O0:    movl -{{[0-9]+}}(%rsp), %esi
-; O0:    movl -{{[0-9]+}}(%rsp), %edx
-; O0:    movl -{{[0-9]+}}(%rsp), %ecx
-; O0:    movl %eax, (%rdi)
-; O0:    movl %r8d, 4(%rdi)
-; O0:    movl %esi, 8(%rdi)
-; O0:    movl %edx, 12(%rdi)
-; O0:    movl %ecx, 16(%rdi)
-entry:
-  %0 = load i32, ptr %foo, align 4
-  %b = getelementptr inbounds nuw i8, ptr %foo, i64 4
-  %1 = load i32, ptr %b, align 4
-  %c = getelementptr inbounds nuw i8, ptr %foo, i64 8
-  %2 = load i32, ptr %c, align 4
-  %d = getelementptr inbounds nuw i8, ptr %foo, i64 12
-  %3 = load i32, ptr %d, align 4
-  %e = getelementptr inbounds nuw i8, ptr %foo, i64 16
-  %4 = load i32, ptr %e, align 4
-  %5 = tail call { i32, i32, i32, i32, i32 } asm sideeffect "# rm tied output: no pressure\0A\09# $0 $1 $2 $3 $4", "=rm,=rm,=rm,=rm,=rm,0,1,2,3,4,~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
-  %asmresult = extractvalue { i32, i32, i32, i32, i32 } %5, 0
-  %asmresult1 = extractvalue { i32, i32, i32, i32, i32 } %5, 1
-  %asmresult2 = extractvalue { i32, i32, i32, i32, i32 } %5, 2
-  %asmresult3 = extractvalue { i32, i32, i32, i32, i32 } %5, 3
-  %asmresult4 = extractvalue { i32, i32, i32, i32, i32 } %5, 4
-  store i32 %asmresult, ptr %foo, align 4
-  store i32 %asmresult1, ptr %b, align 4
-  store i32 %asmresult2, ptr %c, align 4
-  store i32 %asmresult3, ptr %d, align 4
-  store i32 %asmresult4, ptr %e, align 4
-  ret i32 %asmresult
-}
-
-define dso_local i32 @test6(ptr noundef captures(none) %foo) local_unnamed_addr {
-; O2-LABEL: test6:
-; O2:    movq %rdi, (%rsp) # 8-byte Spill
-; O2:    #APP
-; O2:    movq $0, %rax
-; O2:    movq $1, %rcx
-; O2:    movq $2, %rdx
-; O2:    movq $3, %rsi
-; O2:    movq $4, %rdi
-; O2:    movq $5, %rbx
-; O2:    movq $6, %rbp
-; O2:    movq $7, %r8
-; O2:    movq $8, %r9
-; O2:    movq $9, %r10
-; O2:    movq $10, %r11
-; O2:    movq $11, %r12
-; O2:    movq $12, %r13
-; O2:    movq $13, %r14
-; O2:    movq $14, %r15
-; O2:    #NO_APP
-; O2:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %r9, %rbp
-; O2:    movq (%rsp), %rbx # 8-byte Reload
-; O2:    movl (%rbx), %esi
-; O2:    movl 4(%rbx), %edi
-; O2:    movl 8(%rbx), %r8d
-; O2:    movl 12(%rbx), %r9d
-; O2:    movl 16(%rbx), %eax
-; O2:    #APP
-; O2:    # rm tied output: pressure
-; O2:    # %esi %edi %r8d %r9d %eax
-; O2:    #NO_APP
-; O2:    movl %esi, (%rbx)
-; O2:    movl %edi, 4(%rbx)
-; O2:    movl %r8d, 8(%rbx)
-; O2:    movl %r9d, 12(%rbx)
-; O2:    movl %eax, 16(%rbx)
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O2:    movq %rcx, %rsi
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; O2:    callq g at PLT
-; O2:    movl (%rbx), %eax
-;
-; O0-LABEL: test6:
-; O0:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    #APP
-; O0:    movq $0, %rax
-; O0:    movq $1, %rcx
-; O0:    movq $2, %rdx
-; O0:    movq $3, %rsi
-; O0:    movq $4, %rdi
-; O0:    movq $5, %rbx
-; O0:    movq $6, %rbp
-; O0:    movq $7, %r8
-; O0:    movq $8, %r9
-; O0:    movq $9, %r10
-; O0:    movq $10, %r11
-; O0:    movq $11, %r12
-; O0:    movq $12, %r13
-; O0:    movq $13, %r14
-; O0:    movq $14, %r15
-; O0:    #NO_APP
-; O0:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rdi, %rcx
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %r8, %rbx
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; O0:    movq %r9, %rcx
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; O0:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; O0:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movl (%rax), %edi
-; O0:    movl 4(%rax), %esi
-; O0:    movl 8(%rax), %edx
-; O0:    movl 12(%rax), %ecx
-; O0:    movl 16(%rax), %eax
-; O0:    movl %edi, {{[0-9]+}}(%rsp)
-; O0:    movl %esi, {{[0-9]+}}(%rsp)
-; O0:    movl %edx, {{[0-9]+}}(%rsp)
-; O0:    movl %ecx, {{[0-9]+}}(%rsp)
-; O0:    movl %eax, {{[0-9]+}}(%rsp)
-; O0:    movl %ecx, %edi
-; O0:    #APP
-; O0:    # rm tied output: pressure
-; O0:    # %eax %edi %ecx %edx %esi
-; O0:    #NO_APP
-; O0:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; O0:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; O0:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; O0:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %eax, {{[0-9]+}}(%rsp)
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %eax, {{[0-9]+}}(%rsp)
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %eax, {{[0-9]+}}(%rsp)
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %edi, {{[0-9]+}}(%rsp)
-; O0:    movl %eax, {{[0-9]+}}(%rsp)
-; O0:    movl {{[0-9]+}}(%rsp), %eax
-; O0:    movl {{[0-9]+}}(%rsp), %edi
-; O0:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movl {{[0-9]+}}(%rsp), %edi
-; O0:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movl {{[0-9]+}}(%rsp), %edi
-; O0:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movl {{[0-9]+}}(%rsp), %edi
-; O0:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movl %eax, (%rdi)
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %eax, 4(%rdi)
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %eax, 8(%rdi)
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %eax, 12(%rdi)
-; O0:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
-; O0:    movl %eax, 16(%rdi)
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movq %rbp, (%rsp)
-; O0:    movq %rbx, {{[0-9]+}}(%rsp)
-; O0:    movq %rax, {{[0-9]+}}(%rsp)
-; O0:    movq %r10, {{[0-9]+}}(%rsp)
-; O0:    movq %r11, {{[0-9]+}}(%rsp)
-; O0:    movq %r12, {{[0-9]+}}(%rsp)
-; O0:    movq %r13, {{[0-9]+}}(%rsp)
-; O0:    movq %r14, {{[0-9]+}}(%rsp)
-; O0:    movq %r15, {{[0-9]+}}(%rsp)
-; O0:    callq g at PLT
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movl (%rdi), %eax
-entry:
-  %0 = tail call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "mov $$0, $0\0A\09mov $$1, $1\0A\09mov $$2, $2\0A\09mov $$3, $3\0A\09mov $$4, $4\0A\09mov $$5, $5\0A\09mov $$6, $6\0A\09mov $$7, $7\0A\09mov $$8, $8\0A\09mov $$9, $9\0A\09mov $$10, $10\0A\09mov $$11, $11\0A\09mov $$12, $12\0A\09mov $$13, $13\0A\09mov $$14, $14", "={rax},={rcx},={rdx},={rsi},={rdi},={rbx},={rbp},={r8},={r9},={r10},={r11},={r12},={r13},={r14},={r15},~{dirflag},~{fpsr},~{flags}"()
-  %asmresult = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 0
-  %asmresult1 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 1
-  %asmresult2 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 2
-  %asmresult3 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 3
-  %asmresult4 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 4
-  %asmresult5 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 5
-  %asmresult6 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 6
-  %asmresult7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 7
-  %asmresult8 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 8
-  %asmresult9 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 9
-  %asmresult10 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 10
-  %asmresult11 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 11
-  %asmresult12 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 12
-  %asmresult13 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 13
-  %asmresult14 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 14
-  %1 = load i32, ptr %foo, align 4
-  %b = getelementptr inbounds nuw i8, ptr %foo, i64 4
-  %2 = load i32, ptr %b, align 4
-  %c = getelementptr inbounds nuw i8, ptr %foo, i64 8
-  %3 = load i32, ptr %c, align 4
-  %d = getelementptr inbounds nuw i8, ptr %foo, i64 12
-  %4 = load i32, ptr %d, align 4
-  %e = getelementptr inbounds nuw i8, ptr %foo, i64 16
-  %5 = load i32, ptr %e, align 4
-  %6 = tail call { i32, i32, i32, i32, i32 } asm sideeffect "# rm tied output: pressure\0A\09# $0 $1 $2 $3 $4", "=rm,=rm,=rm,=rm,=rm,0,1,2,3,4,~{dirflag},~{fpsr},~{flags}"(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5)
-  %asmresult15 = extractvalue { i32, i32, i32, i32, i32 } %6, 0
-  %asmresult16 = extractvalue { i32, i32, i32, i32, i32 } %6, 1
-  %asmresult17 = extractvalue { i32, i32, i32, i32, i32 } %6, 2
-  %asmresult18 = extractvalue { i32, i32, i32, i32, i32 } %6, 3
-  %asmresult19 = extractvalue { i32, i32, i32, i32, i32 } %6, 4
-  store i32 %asmresult15, ptr %foo, align 4
-  store i32 %asmresult16, ptr %b, align 4
-  store i32 %asmresult17, ptr %c, align 4
-  store i32 %asmresult18, ptr %d, align 4
-  store i32 %asmresult19, ptr %e, align 4
-  tail call void @g(i64 noundef %asmresult, i64 noundef %asmresult1, i64 noundef %asmresult2, i64 noundef %asmresult3, i64 noundef %asmresult4, i64 noundef %asmresult5, i64 noundef %asmresult6, i64 noundef %asmresult7, i64 noundef %asmresult8, i64 noundef %asmresult9, i64 noundef %asmresult10, i64 noundef %asmresult11, i64 noundef %asmresult12, i64 noundef %asmresult13, i64 noundef %asmresult14)
-  %7 = load i32, ptr %foo, align 4
-  ret i32 %7
-}
-
-define dso_local i32 @test7(ptr noundef captures(none) initializes((0, 4)) %foo) local_unnamed_addr {
-; O2-LABEL: test7:
-; O2:    movl 4(%rdi), %eax
-; O2:    #APP
-; O2:    # rm output, r input: no pressure
-; O2:    # %eax %eax
-; O2:    #NO_APP
-; O2:    movl %eax, (%rdi)
-;
-; O0-LABEL: test7:
-; O0:    movl 4(%rdi), %eax
-; O0:    #APP
-; O0:    # rm output, r input: no pressure
-; O0:    # -{{[0-9]+}}(%rsp) %eax
-; O0:    #NO_APP
-; O0:    movl -{{[0-9]+}}(%rsp), %eax
-; O0:    movl %eax, (%rdi)
-entry:
-  %b = getelementptr inbounds nuw i8, ptr %foo, i64 4
-  %0 = load i32, ptr %b, align 4
-  %1 = tail call i32 asm sideeffect "# rm output, r input: no pressure\0A\09# $0 $1", "=rm,r,~{dirflag},~{fpsr},~{flags}"(i32 %0)
-  store i32 %1, ptr %foo, align 4
-  ret i32 %1
-}
-
-define dso_local i32 @test8(ptr noundef captures(none) initializes((0, 4)) %foo) local_unnamed_addr {
-; O2-LABEL: test8:
-; O2:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    #APP
-; O2:    movq $0, %rax
-; O2:    movq $1, %rcx
-; O2:    movq $2, %rdx
-; O2:    movq $3, %rsi
-; O2:    movq $4, %rdi
-; O2:    movq $5, %rbx
-; O2:    movq $6, %rbp
-; O2:    movq $7, %r8
-; O2:    movq $8, %r9
-; O2:    movq $9, %r10
-; O2:    movq $10, %r11
-; O2:    movq $11, %r12
-; O2:    movq $12, %r13
-; O2:    movq $13, %r14
-; O2:    movq $14, %r15
-; O2:    #NO_APP
-; O2:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; O2:    movl 4(%rbp), %esi
-; O2:    #APP
-; O2:    # rm output, r input: pressure
-; O2:    # %esi %esi
-; O2:    #NO_APP
-; O2:    movl %esi, (%rbp)
-; O2:    movq %rax, %rdi
-; O2:    movq %rcx, %rsi
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; O2:    movq %rbx, %r9
-; O2:    callq g at PLT
-; O2:    movl (%rbp), %eax
-;
-; O0-LABEL: test8:
-; O0:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    #APP
-; O0:    movq $0, %rax
-; O0:    movq $1, %rcx
-; O0:    movq $2, %rdx
-; O0:    movq $3, %rsi
-; O0:    movq $4, %rdi
-; O0:    movq $5, %rbx
-; O0:    movq $6, %rbp
-; O0:    movq $7, %r8
-; O0:    movq $8, %r9
-; O0:    movq $9, %r10
-; O0:    movq $10, %r11
-; O0:    movq $11, %r12
-; O0:    movq $12, %r13
-; O0:    movq $13, %r14
-; O0:    movq $14, %r15
-; O0:    #NO_APP
-; O0:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rsi, %rcx
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; O0:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %r8, %rbx
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; O0:    movq %r9, %rdi
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; O0:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movl 4(%rdi), %eax
-; O0:    #APP
-; O0:    # rm output, r input: pressure
-; O0:    # {{[0-9]+}}(%rsp) %eax
-; O0:    #NO_APP
-; O0:    movl {{[0-9]+}}(%rsp), %eax
-; O0:    movl %eax, (%rdi)
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movq %rbp, (%rsp)
-; O0:    movq %rbx, {{[0-9]+}}(%rsp)
-; O0:    movq %rax, {{[0-9]+}}(%rsp)
-; O0:    movq %r10, {{[0-9]+}}(%rsp)
-; O0:    movq %r11, {{[0-9]+}}(%rsp)
-; O0:    movq %r12, {{[0-9]+}}(%rsp)
-; O0:    movq %r13, {{[0-9]+}}(%rsp)
-; O0:    movq %r14, {{[0-9]+}}(%rsp)
-; O0:    movq %r15, {{[0-9]+}}(%rsp)
-; O0:    callq g at PLT
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movl (%rdi), %eax
-entry:
-  %0 = tail call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "mov $$0, $0\0A\09mov $$1, $1\0A\09mov $$2, $2\0A\09mov $$3, $3\0A\09mov $$4, $4\0A\09mov $$5, $5\0A\09mov $$6, $6\0A\09mov $$7, $7\0A\09mov $$8, $8\0A\09mov $$9, $9\0A\09mov $$10, $10\0A\09mov $$11, $11\0A\09mov $$12, $12\0A\09mov $$13, $13\0A\09mov $$14, $14", "={rax},={rcx},={rdx},={rsi},={rdi},={rbx},={rbp},={r8},={r9},={r10},={r11},={r12},={r13},={r14},={r15},~{dirflag},~{fpsr},~{flags}"()
-  %asmresult = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 0
-  %asmresult1 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 1
-  %asmresult2 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 2
-  %asmresult3 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 3
-  %asmresult4 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 4
-  %asmresult5 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 5
-  %asmresult6 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 6
-  %asmresult7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 7
-  %asmresult8 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 8
-  %asmresult9 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 9
-  %asmresult10 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 10
-  %asmresult11 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 11
-  %asmresult12 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 12
-  %asmresult13 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 13
-  %asmresult14 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 14
-  %b = getelementptr inbounds nuw i8, ptr %foo, i64 4
-  %1 = load i32, ptr %b, align 4
-  %2 = tail call i32 asm sideeffect "# rm output, r input: pressure\0A\09# $0 $1", "=rm,r,~{dirflag},~{fpsr},~{flags}"(i32 %1)
-  store i32 %2, ptr %foo, align 4
-  tail call void @g(i64 noundef %asmresult, i64 noundef %asmresult1, i64 noundef %asmresult2, i64 noundef %asmresult3, i64 noundef %asmresult4, i64 noundef %asmresult5, i64 noundef %asmresult6, i64 noundef %asmresult7, i64 noundef %asmresult8, i64 noundef %asmresult9, i64 noundef %asmresult10, i64 noundef %asmresult11, i64 noundef %asmresult12, i64 noundef %asmresult13, i64 noundef %asmresult14)
-  %3 = load i32, ptr %foo, align 4
-  ret i32 %3
-}
-
-define dso_local i32 @test9(ptr noundef %foo) local_unnamed_addr {
-; O2-LABEL: test9:
-; O2:    movl 4(%rdi), %eax
-; O2:    #APP
-; O2:    # m output, rm input: no pressure
-; O2:    # (%rdi) %eax
-; O2:    #NO_APP
-; O2:    movl (%rdi), %eax
-;
-; O0-LABEL: test9:
-; O0:    movl 4(%rdi), %eax
-; O0:    movl %eax, -{{[0-9]+}}(%rsp)
-; O0:    movq %rax, -{{[0-9]+}}(%rsp)
-; O0:    #APP
-; O0:    # m output, rm input: no pressure
-; O0:    # (%rdi) -{{[0-9]+}}(%rsp)
-; O0:    #NO_APP
-; O0:    movl (%rdi), %eax
-entry:
-  %b = getelementptr inbounds nuw i8, ptr %foo, i64 4
-  %0 = load i32, ptr %b, align 4
-  tail call void asm sideeffect "# m output, rm input: no pressure\0A\09# $0 $1", "=*m,rm,~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i32) %foo, i32 %0)
-  %1 = load i32, ptr %foo, align 4
-  ret i32 %1
-}
-
-define dso_local i32 @test10(ptr noundef %foo) local_unnamed_addr {
-; O2-LABEL: test10:
-; O2:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    #APP
-; O2:    movq $0, %rax
-; O2:    movq $1, %rcx
-; O2:    movq $2, %rdx
-; O2:    movq $3, %rsi
-; O2:    movq $4, %rdi
-; O2:    movq $5, %rbx
-; O2:    movq $6, %rbp
-; O2:    movq $7, %r8
-; O2:    movq $8, %r9
-; O2:    movq $9, %r10
-; O2:    movq $10, %r11
-; O2:    movq $11, %r12
-; O2:    movq $12, %r13
-; O2:    movq $13, %r14
-; O2:    movq $14, %r15
-; O2:    #NO_APP
-; O2:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; O2:    movl 4(%rbp), %esi
-; O2:    #APP
-; O2:    # m output, rm input: pressure
-; O2:    # (%rbp) %esi
-; O2:    #NO_APP
-; O2:    movq %rax, %rdi
-; O2:    movq %rcx, %rsi
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; O2:    movq %rbx, %r9
-; O2:    callq g at PLT
-; O2:    movl (%rbp), %eax
-;
-; O0-LABEL: test10:
-; O0:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    #APP
-; O0:    movq $0, %rax
-; O0:    movq $1, %rcx
-; O0:    movq $2, %rdx
-; O0:    movq $3, %rsi
-; O0:    movq $4, %rdi
-; O0:    movq $5, %rbx
-; O0:    movq $6, %rbp
-; O0:    movq $7, %r8
-; O0:    movq $8, %r9
-; O0:    movq $9, %r10
-; O0:    movq $10, %r11
-; O0:    movq $11, %r12
-; O0:    movq $12, %r13
-; O0:    movq $13, %r14
-; O0:    movq $14, %r15
-; O0:    #NO_APP
-; O0:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rsi, %rcx
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; O0:    movq %rdi, %rax
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %r8, %rbx
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; O0:    movq %r9, %rax
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; O0:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movl 4(%rdi), %edi
-; O0:    movl %edi, {{[0-9]+}}(%rsp)
-; O0:    movq %rdi, {{[0-9]+}}(%rsp)
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    #APP
-; O0:    # m output, rm input: pressure
-; O0:    # (%rdi) {{[0-9]+}}(%rsp)
-; O0:    #NO_APP
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movq %rbp, (%rsp)
-; O0:    movq %rbx, {{[0-9]+}}(%rsp)
-; O0:    movq %rax, {{[0-9]+}}(%rsp)
-; O0:    movq %r10, {{[0-9]+}}(%rsp)
-; O0:    movq %r11, {{[0-9]+}}(%rsp)
-; O0:    movq %r12, {{[0-9]+}}(%rsp)
-; O0:    movq %r13, {{[0-9]+}}(%rsp)
-; O0:    movq %r14, {{[0-9]+}}(%rsp)
-; O0:    movq %r15, {{[0-9]+}}(%rsp)
-; O0:    callq g at PLT
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movl (%rdi), %eax
-entry:
-  %0 = tail call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "mov $$0, $0\0A\09mov $$1, $1\0A\09mov $$2, $2\0A\09mov $$3, $3\0A\09mov $$4, $4\0A\09mov $$5, $5\0A\09mov $$6, $6\0A\09mov $$7, $7\0A\09mov $$8, $8\0A\09mov $$9, $9\0A\09mov $$10, $10\0A\09mov $$11, $11\0A\09mov $$12, $12\0A\09mov $$13, $13\0A\09mov $$14, $14", "={rax},={rcx},={rdx},={rsi},={rdi},={rbx},={rbp},={r8},={r9},={r10},={r11},={r12},={r13},={r14},={r15},~{dirflag},~{fpsr},~{flags}"()
-  %asmresult = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 0
-  %asmresult1 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 1
-  %asmresult2 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 2
-  %asmresult3 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 3
-  %asmresult4 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 4
-  %asmresult5 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 5
-  %asmresult6 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 6
-  %asmresult7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 7
-  %asmresult8 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 8
-  %asmresult9 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 9
-  %asmresult10 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 10
-  %asmresult11 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 11
-  %asmresult12 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 12
-  %asmresult13 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 13
-  %asmresult14 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 14
-  %b = getelementptr inbounds nuw i8, ptr %foo, i64 4
-  %1 = load i32, ptr %b, align 4
-  tail call void asm sideeffect "# m output, rm input: pressure\0A\09# $0 $1", "=*m,rm,~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i32) %foo, i32 %1)
-  tail call void @g(i64 noundef %asmresult, i64 noundef %asmresult1, i64 noundef %asmresult2, i64 noundef %asmresult3, i64 noundef %asmresult4, i64 noundef %asmresult5, i64 noundef %asmresult6, i64 noundef %asmresult7, i64 noundef %asmresult8, i64 noundef %asmresult9, i64 noundef %asmresult10, i64 noundef %asmresult11, i64 noundef %asmresult12, i64 noundef %asmresult13, i64 noundef %asmresult14)
-  %2 = load i32, ptr %foo, align 4
-  ret i32 %2
-}
-
-define dso_local i32 @test11(ptr noundef %foo) local_unnamed_addr {
-; O2-LABEL: test11:
-; O2:    movl (%rdi), %eax
-; O2:    movl 4(%rdi), %ecx
-; O2:    #APP
-; O2:    # multiple m output, rm input: no pressure
-; O2:    # (%rdi) 4(%rdi) 8(%rdi) 12(%rdi) 16(%rdi) %eax %ecx
-; O2:    #NO_APP
-; O2:    movl (%rdi), %eax
-;
-; O0-LABEL: test11:
-; O0:    movq %rdi, %rax
-; O0:    movq %rdi, %rcx
-; O0:    movq %rdi, %rdx
-; O0:    movq %rdi, %rsi
-; O0:    movl (%rdi), %r9d
-; O0:    movl 4(%rdi), %r8d
-; O0:    movl %r9d, -{{[0-9]+}}(%rsp)
-; O0:    movl %r8d, -{{[0-9]+}}(%rsp)
-; O0:    movq %r8, -{{[0-9]+}}(%rsp)
-; O0:    movq %r8, -{{[0-9]+}}(%rsp)
-; O0:    #APP
-; O0:    # multiple m output, rm input: no pressure
-; O0:    # (%rdi) (%rax) (%rcx) (%rdx) (%rsi) -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp)
-; O0:    #NO_APP
-; O0:    movl (%rdi), %eax
-entry:
-  %b = getelementptr inbounds nuw i8, ptr %foo, i64 4
-  %c = getelementptr inbounds nuw i8, ptr %foo, i64 8
-  %d = getelementptr inbounds nuw i8, ptr %foo, i64 12
-  %e = getelementptr inbounds nuw i8, ptr %foo, i64 16
-  %0 = load i32, ptr %foo, align 4
-  %1 = load i32, ptr %b, align 4
-  tail call void asm sideeffect "# multiple m output, rm input: no pressure\0A\09# $0 $1 $2 $3 $4 $5 $6", "=*m,=*m,=*m,=*m,=*m,rm,rm,~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32) %foo, ptr nonnull elementtype(i32) %b, ptr nonnull elementtype(i32) %c, ptr nonnull elementtype(i32) %d, ptr nonnull elementtype(i32) %e, i32 %0, i32 %1)
-  %2 = load i32, ptr %foo, align 4
-  ret i32 %2
-}
-
-define dso_local i32 @test12(ptr noundef %foo) local_unnamed_addr {
-; O2-LABEL: test12:
-; O2:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    #APP
-; O2:    movq $0, %rax
-; O2:    movq $1, %rcx
-; O2:    movq $2, %rdx
-; O2:    movq $3, %rsi
-; O2:    movq $4, %rdi
-; O2:    movq $5, %rbx
-; O2:    movq $6, %rbp
-; O2:    movq $7, %r8
-; O2:    movq $8, %r9
-; O2:    movq $9, %r10
-; O2:    movq $10, %r11
-; O2:    movq $11, %r12
-; O2:    movq $12, %r13
-; O2:    movq $13, %r14
-; O2:    movq $14, %r15
-; O2:    #NO_APP
-; O2:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; O2:    movl (%rbp), %esi
-; O2:    movl 4(%rbp), %edi
-; O2:    #APP
-; O2:    # multiple m output, rm input: pressure
-; O2:    # (%rbp) 4(%rbp) 8(%rbp) 12(%rbp) 16(%rbp) %esi %edi
-; O2:    #NO_APP
-; O2:    movq %rax, %rdi
-; O2:    movq %rcx, %rsi
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; O2:    movq %rbx, %r9
-; O2:    callq g at PLT
-; O2:    movl (%rbp), %eax
-;
-; O0-LABEL: test12:
-; O0:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    #APP
-; O0:    movq $0, %rax
-; O0:    movq $1, %rcx
-; O0:    movq $2, %rdx
-; O0:    movq $3, %rsi
-; O0:    movq $4, %rdi
-; O0:    movq $5, %rbx
-; O0:    movq $6, %rbp
-; O0:    movq $7, %r8
-; O0:    movq $8, %r9
-; O0:    movq $9, %r10
-; O0:    movq $10, %r11
-; O0:    movq $11, %r12
-; O0:    movq $12, %r13
-; O0:    movq $13, %r14
-; O0:    movq $14, %r15
-; O0:    #NO_APP
-; O0:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %r8, %rbx
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; O0:    movq %r9, %rdi
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; O0:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rdi, %rax
-; O0:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rdi, %rcx
-; O0:    movq %rdi, %rdx
-; O0:    movq %rdi, %rsi
-; O0:    movl (%rdi), %eax
-; O0:    movl 4(%rdi), %edi
-; O0:    movl %eax, {{[0-9]+}}(%rsp)
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; O0:    movl %edi, {{[0-9]+}}(%rsp)
-; O0:    movq %rdi, {{[0-9]+}}(%rsp)
-; O0:    movq %rdi, {{[0-9]+}}(%rsp)
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    #APP
-; O0:    # multiple m output, rm input: pressure
-; O0:    # (%rdi) (%rax) (%rcx) (%rdx) (%rsi) {{[0-9]+}}(%rsp) {{[0-9]+}}(%rsp)
-; O0:    #NO_APP
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movq %rbp, (%rsp)
-; O0:    movq %rbx, {{[0-9]+}}(%rsp)
-; O0:    movq %rax, {{[0-9]+}}(%rsp)
-; O0:    movq %r10, {{[0-9]+}}(%rsp)
-; O0:    movq %r11, {{[0-9]+}}(%rsp)
-; O0:    movq %r12, {{[0-9]+}}(%rsp)
-; O0:    movq %r13, {{[0-9]+}}(%rsp)
-; O0:    movq %r14, {{[0-9]+}}(%rsp)
-; O0:    movq %r15, {{[0-9]+}}(%rsp)
-; O0:    callq g at PLT
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movl (%rdi), %eax
-entry:
-  %0 = tail call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "mov $$0, $0\0A\09mov $$1, $1\0A\09mov $$2, $2\0A\09mov $$3, $3\0A\09mov $$4, $4\0A\09mov $$5, $5\0A\09mov $$6, $6\0A\09mov $$7, $7\0A\09mov $$8, $8\0A\09mov $$9, $9\0A\09mov $$10, $10\0A\09mov $$11, $11\0A\09mov $$12, $12\0A\09mov $$13, $13\0A\09mov $$14, $14", "={rax},={rcx},={rdx},={rsi},={rdi},={rbx},={rbp},={r8},={r9},={r10},={r11},={r12},={r13},={r14},={r15},~{dirflag},~{fpsr},~{flags}"()
-  %asmresult = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 0
-  %asmresult1 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 1
-  %asmresult2 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 2
-  %asmresult3 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 3
-  %asmresult4 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 4
-  %asmresult5 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 5
-  %asmresult6 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 6
-  %asmresult7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 7
-  %asmresult8 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 8
-  %asmresult9 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 9
-  %asmresult10 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 10
-  %asmresult11 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 11
-  %asmresult12 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 12
-  %asmresult13 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 13
-  %asmresult14 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 14
-  %b = getelementptr inbounds nuw i8, ptr %foo, i64 4
-  %c = getelementptr inbounds nuw i8, ptr %foo, i64 8
-  %d = getelementptr inbounds nuw i8, ptr %foo, i64 12
-  %e = getelementptr inbounds nuw i8, ptr %foo, i64 16
-  %1 = load i32, ptr %foo, align 4
-  %2 = load i32, ptr %b, align 4
-  tail call void asm sideeffect "# multiple m output, rm input: pressure\0A\09# $0 $1 $2 $3 $4 $5 $6", "=*m,=*m,=*m,=*m,=*m,rm,rm,~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32) %foo, ptr nonnull elementtype(i32) %b, ptr nonnull elementtype(i32) %c, ptr nonnull elementtype(i32) %d, ptr nonnull elementtype(i32) %e, i32 %1, i32 %2)
-  tail call void @g(i64 noundef %asmresult, i64 noundef %asmresult1, i64 noundef %asmresult2, i64 noundef %asmresult3, i64 noundef %asmresult4, i64 noundef %asmresult5, i64 noundef %asmresult6, i64 noundef %asmresult7, i64 noundef %asmresult8, i64 noundef %asmresult9, i64 noundef %asmresult10, i64 noundef %asmresult11, i64 noundef %asmresult12, i64 noundef %asmresult13, i64 noundef %asmresult14)
-  %3 = load i32, ptr %foo, align 4
-  ret i32 %3
-}
-
-define dso_local i32 @test13(ptr noundef %foo) local_unnamed_addr {
-; O2-LABEL: test13:
-; O2:    movl (%rdi), %ecx
-; O2:    movl 4(%rdi), %edx
-; O2:    #APP
-; O2:    # multiple m output, rm input: no pressure
-; O2:    # %eax %esi %r8d %r9d %r10d %ecx %edx
-; O2:    #NO_APP
-; O2:    movl %eax, (%rdi)
-; O2:    movl %esi, 4(%rdi)
-; O2:    movl %r8d, 8(%rdi)
-; O2:    movl %r9d, 12(%rdi)
-; O2:    movl %r10d, 16(%rdi)
-;
-; O0-LABEL: test13:
-; O0:    movq %rdi, %rax
-; O0:    movq %rdi, %rcx
-; O0:    movq %rdi, %rdx
-; O0:    movq %rdi, %rsi
-; O0:    movl (%rdi), %r9d
-; O0:    movl 4(%rdi), %r8d
-; O0:    movl %r9d, -{{[0-9]+}}(%rsp)
-; O0:    movl %r8d, -{{[0-9]+}}(%rsp)
-; O0:    movq %r8, -{{[0-9]+}}(%rsp)
-; O0:    movq %r8, -{{[0-9]+}}(%rsp)
-; O0:    #APP
-; O0:    # multiple m output, rm input: no pressure
-; O0:    # (%rdi) (%rax) (%rcx) (%rdx) (%rsi) -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp)
-; O0:    #NO_APP
-; O0:    movl (%rdi), %eax
-entry:
-  %b = getelementptr inbounds nuw i8, ptr %foo, i64 4
-  %c = getelementptr inbounds nuw i8, ptr %foo, i64 8
-  %d = getelementptr inbounds nuw i8, ptr %foo, i64 12
-  %e = getelementptr inbounds nuw i8, ptr %foo, i64 16
-  %0 = load i32, ptr %foo, align 4
-  %1 = load i32, ptr %b, align 4
-  tail call void asm sideeffect "# multiple m output, rm input: no pressure\0A\09# $0 $1 $2 $3 $4 $5 $6", "=*&rm,=*&rm,=*&rm,=*&rm,=*&rm,rm,rm,~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32) %foo, ptr nonnull elementtype(i32) %b, ptr nonnull elementtype(i32) %c, ptr nonnull elementtype(i32) %d, ptr nonnull elementtype(i32) %e, i32 %0, i32 %1)
-  %2 = load i32, ptr %foo, align 4
-  ret i32 %2
-}
-
-define dso_local i32 @test14(ptr noundef %foo) local_unnamed_addr {
-; O2-LABEL: test14:
-; O2:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    #APP
-; O2:    movq $0, %rax
-; O2:    movq $1, %rcx
-; O2:    movq $2, %rdx
-; O2:    movq $3, %rsi
-; O2:    movq $4, %rdi
-; O2:    movq $5, %rbx
-; O2:    movq $6, %rbp
-; O2:    movq $7, %r8
-; O2:    movq $8, %r9
-; O2:    movq $9, %r10
-; O2:    movq $10, %r11
-; O2:    movq $11, %r12
-; O2:    movq $12, %r13
-; O2:    movq $13, %r14
-; O2:    movq $14, %r15
-; O2:    #NO_APP
-; O2:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; O2:    movl (%rbp), %esi
-; O2:    movl 4(%rbp), %edi
-; O2:    #APP
-; O2:    # multiple m output, rm input: pressure
-; O2:    # (%rbp) 4(%rbp) 8(%rbp) 12(%rbp) 16(%rbp) %esi %edi
-; O2:    #NO_APP
-; O2:    movq %rax, %rdi
-; O2:    movq %rcx, %rsi
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; O2:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; O2:    movq %rbx, %r9
-; O2:    callq g at PLT
-; O2:    movl (%rbp), %eax
-;
-; O0-LABEL: test14:
-; O0:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    #APP
-; O0:    movq $0, %rax
-; O0:    movq $1, %rcx
-; O0:    movq $2, %rdx
-; O0:    movq $3, %rsi
-; O0:    movq $4, %rdi
-; O0:    movq $5, %rbx
-; O0:    movq $6, %rbp
-; O0:    movq $7, %r8
-; O0:    movq $8, %r9
-; O0:    movq $9, %r10
-; O0:    movq $10, %r11
-; O0:    movq $11, %r12
-; O0:    movq $12, %r13
-; O0:    movq $13, %r14
-; O0:    movq $14, %r15
-; O0:    #NO_APP
-; O0:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %r8, %rbx
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; O0:    movq %r9, %rdi
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; O0:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rdi, %rax
-; O0:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; O0:    movq %rdi, %rcx
-; O0:    movq %rdi, %rdx
-; O0:    movq %rdi, %rsi
-; O0:    movl (%rdi), %eax
-; O0:    movl 4(%rdi), %edi
-; O0:    movl %eax, {{[0-9]+}}(%rsp)
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; O0:    movl %edi, {{[0-9]+}}(%rsp)
-; O0:    movq %rdi, {{[0-9]+}}(%rsp)
-; O0:    movq %rdi, {{[0-9]+}}(%rsp)
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    #APP
-; O0:    # multiple m output, rm input: pressure
-; O0:    # (%rdi) (%rax) (%rcx) (%rdx) (%rsi) {{[0-9]+}}(%rsp) {{[0-9]+}}(%rsp)
-; O0:    #NO_APP
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movq %rbp, (%rsp)
-; O0:    movq %rbx, {{[0-9]+}}(%rsp)
-; O0:    movq %rax, {{[0-9]+}}(%rsp)
-; O0:    movq %r10, {{[0-9]+}}(%rsp)
-; O0:    movq %r11, {{[0-9]+}}(%rsp)
-; O0:    movq %r12, {{[0-9]+}}(%rsp)
-; O0:    movq %r13, {{[0-9]+}}(%rsp)
-; O0:    movq %r14, {{[0-9]+}}(%rsp)
-; O0:    movq %r15, {{[0-9]+}}(%rsp)
-; O0:    callq g at PLT
-; O0:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; O0:    movl (%rdi), %eax
-entry:
-  %0 = tail call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "mov $$0, $0\0A\09mov $$1, $1\0A\09mov $$2, $2\0A\09mov $$3, $3\0A\09mov $$4, $4\0A\09mov $$5, $5\0A\09mov $$6, $6\0A\09mov $$7, $7\0A\09mov $$8, $8\0A\09mov $$9, $9\0A\09mov $$10, $10\0A\09mov $$11, $11\0A\09mov $$12, $12\0A\09mov $$13, $13\0A\09mov $$14, $14", "={rax},={rcx},={rdx},={rsi},={rdi},={rbx},={rbp},={r8},={r9},={r10},={r11},={r12},={r13},={r14},={r15},~{dirflag},~{fpsr},~{flags}"()
-  %asmresult = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 0
-  %asmresult1 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 1
-  %asmresult2 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 2
-  %asmresult3 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 3
-  %asmresult4 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 4
-  %asmresult5 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 5
-  %asmresult6 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 6
-  %asmresult7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 7
-  %asmresult8 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 8
-  %asmresult9 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 9
-  %asmresult10 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 10
-  %asmresult11 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 11
-  %asmresult12 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 12
-  %asmresult13 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 13
-  %asmresult14 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %0, 14
-  %b = getelementptr inbounds nuw i8, ptr %foo, i64 4
-  %c = getelementptr inbounds nuw i8, ptr %foo, i64 8
-  %d = getelementptr inbounds nuw i8, ptr %foo, i64 12
-  %e = getelementptr inbounds nuw i8, ptr %foo, i64 16
-  %1 = load i32, ptr %foo, align 4
-  %2 = load i32, ptr %b, align 4
-  tail call void asm sideeffect "# multiple m output, rm input: pressure\0A\09# $0 $1 $2 $3 $4 $5 $6", "=*m,=*m,=*m,=*m,=*m,rm,rm,~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32) %foo, ptr nonnull elementtype(i32) %b, ptr nonnull elementtype(i32) %c, ptr nonnull elementtype(i32) %d, ptr nonnull elementtype(i32) %e, i32 %1, i32 %2)
-  tail call void @g(i64 noundef %asmresult, i64 noundef %asmresult1, i64 noundef %asmresult2, i64 noundef %asmresult3, i64 noundef %asmresult4, i64 noundef %asmresult5, i64 noundef %asmresult6, i64 noundef %asmresult7, i64 noundef %asmresult8, i64 noundef %asmresult9, i64 noundef %asmresult10, i64 noundef %asmresult11, i64 noundef %asmresult12, i64 noundef %asmresult13, i64 noundef %asmresult14)
-  %3 = load i32, ptr %foo, align 4
-  ret i32 %3
-}
-
-declare void @g(i64 noundef, i64 noundef, i64 noundef, i64 noundef, i64 noundef, i64 noundef, i64 noundef, i64 noundef, i64 noundef, i64 noundef, i64 noundef, i64 noundef, i64 noundef, i64 noundef, i64 noundef)

diff  --git a/llvm/test/CodeGen/X86/inline-asm-prepare-memory.ll b/llvm/test/CodeGen/X86/inline-asm-prepare-memory.ll
deleted file mode 100644
index 3cd664ab08754..0000000000000
--- a/llvm/test/CodeGen/X86/inline-asm-prepare-memory.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
-; RUN: opt -S -mtriple=x86_64-unknown-linux-gnu -inline-asm-prepare < %s | FileCheck %s
-
-define void @test1(i32 %x) {
-; CHECK-LABEL: define void @test1(
-; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[ASM_MEM:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[X]], ptr [[ASM_MEM]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 asm sideeffect "mov $1, $0", "=r,rm,~{dirflag},~{fpsr},~{flags}"(ptr [[ASM_MEM]])
-; CHECK-NEXT:    ret void
-;
-entry:
-  %0 = call i32 asm sideeffect "mov $1, $0", "=r,rm,~{dirflag},~{fpsr},~{flags}"(i32 %x)
-  ret void
-}
-
-define void @test2(ptr %p) {
-; CHECK-LABEL: define void @test2(
-; CHECK-SAME: ptr [[P:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[ASM_MEM:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void asm sideeffect "mov $1, $0", "=*rm,~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i32) [[ASM_MEM]])
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ASM_MEM]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], ptr [[P]], align 4
-; CHECK-NEXT:    ret void
-;
-entry:
-  %0 = call i32 asm sideeffect "mov $1, $0", "=rm,~{dirflag},~{fpsr},~{flags}"()
-  store i32 %0, ptr %p
-  ret void
-}
-
-define void @test3(ptr %x_ptr) {
-; CHECK-LABEL: define void @test3(
-; CHECK-SAME: ptr [[X_PTR:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[ASM_MEM:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[X_PTR]], align 4
-; CHECK-NEXT:    store i32 [[X]], ptr [[ASM_MEM]], align 4
-; CHECK-NEXT:    call void asm sideeffect "inc $0", "=*rm,0,~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i32) [[ASM_MEM]], ptr [[ASM_MEM]])
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ASM_MEM]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], ptr [[X_PTR]], align 4
-; CHECK-NEXT:    ret void
-;
-entry:
-  %x = load i32, ptr %x_ptr
-  %0 = call i32 asm sideeffect "inc $0", "=rm,0,~{dirflag},~{fpsr},~{flags}"(i32 %x)
-  store i32 %0, ptr %x_ptr
-  ret void
-}

diff  --git a/llvm/test/CodeGen/X86/inlineasm-sched-bug.ll b/llvm/test/CodeGen/X86/inlineasm-sched-bug.ll
index a322bd3003a58..be4d1c29332f7 100644
--- a/llvm/test/CodeGen/X86/inlineasm-sched-bug.ll
+++ b/llvm/test/CodeGen/X86/inlineasm-sched-bug.ll
@@ -6,13 +6,16 @@
 define i32 @foo(i32 %treemap) nounwind {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl %eax, %ecx
 ; CHECK-NEXT:    negl %ecx
 ; CHECK-NEXT:    andl %eax, %ecx
+; CHECK-NEXT:    movl %ecx, (%esp)
 ; CHECK-NEXT:    #APP
-; CHECK-NEXT:    bsfl %ecx, %eax
+; CHECK-NEXT:    bsfl (%esp), %eax
 ; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    popl %ecx
 ; CHECK-NEXT:    retl
 entry:
   %sub = sub i32 0, %treemap

diff  --git a/llvm/test/CodeGen/X86/llc-pipeline-npm.ll b/llvm/test/CodeGen/X86/llc-pipeline-npm.ll
index 49df99b25e0b9..ec27b156ece41 100644
--- a/llvm/test/CodeGen/X86/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/X86/llc-pipeline-npm.ll
@@ -29,7 +29,7 @@
 ; O0-NEXT: expand-reductions
 ; O0-NEXT: indirectbr-expand
 ; O0-NEXT: dwarf-eh-prepare
-; O0-NEXT: inline-asm-prepare
+; O0-NEXT: callbr-prepare
 ; O0-NEXT: safe-stack
 ; O0-NEXT: stack-protector
 ; O0-NEXT: verify)
@@ -101,7 +101,7 @@
 ; O2-NEXT: codegenprepare
 ; O2-NEXT: dwarf-eh-prepare
 ; O2-NEXT: objc-arc-contract
-; O2-NEXT: inline-asm-prepare
+; O2-NEXT: callbr-prepare
 ; O2-NEXT: safe-stack
 ; O2-NEXT: stack-protector
 ; O2-NEXT: verify)
@@ -208,7 +208,7 @@
 ; O0-WINDOWS-NEXT: cfguard
 ; O0-WINDOWS-NEXT: win-eh-prepare
 ; O0-WINDOWS-NEXT: dwarf-eh-prepare
-; O0-WINDOWS-NEXT: inline-asm-prepare
+; O0-WINDOWS-NEXT: callbr-prepare
 ; O0-WINDOWS-NEXT: safe-stack
 ; O0-WINDOWS-NEXT: stack-protector
 ; O0-WINDOWS-NEXT: verify)
@@ -284,7 +284,7 @@
 ; O3-WINDOWS-NEXT: win-eh-prepare
 ; O3-WINDOWS-NEXT: dwarf-eh-prepare
 ; O3-WINDOWS-NEXT: objc-arc-contract
-; O3-WINDOWS-NEXT: inline-asm-prepare
+; O3-WINDOWS-NEXT: callbr-prepare
 ; O3-WINDOWS-NEXT: safe-stack
 ; O3-WINDOWS-NEXT: stack-protector
 ; O3-WINDOWS-NEXT: verify)

diff  --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index 55d386d0f0952..617cfd4c01145 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -76,7 +76,7 @@
 ; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
 ; CHECK-NEXT:       Function Alias Analysis Results
 ; CHECK-NEXT:       ObjC ARC contraction
-; CHECK-NEXT:       Prepare inline asm insts
+; CHECK-NEXT:       Prepare callbr
 ; CHECK-NEXT:       Safe Stack instrumentation pass
 ; CHECK-NEXT:       Insert stack protectors
 ; CHECK-NEXT:       Module Verifier

diff  --git a/llvm/tools/opt/optdriver.cpp b/llvm/tools/opt/optdriver.cpp
index adfbf2e368e58..b0c2733758948 100644
--- a/llvm/tools/opt/optdriver.cpp
+++ b/llvm/tools/opt/optdriver.cpp
@@ -378,7 +378,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
       "structurizecfg",
       "fix-irreducible",
       "expand-ir-insts",
-      "inline-asm-prepare",
+      "callbrprepare",
       "scalarizer",
   };
   for (StringLiteral P : PassNamePrefix)
@@ -432,7 +432,7 @@ optMain(int argc, char **argv,
   initializeExpandMemCmpLegacyPassPass(Registry);
   initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
   initializeSelectOptimizePass(Registry);
-  initializeInlineAsmPreparePass(Registry);
+  initializeCallBrPreparePass(Registry);
   initializeCodeGenPrepareLegacyPassPass(Registry);
   initializeAtomicExpandLegacyPass(Registry);
   initializeWinEHPreparePass(Registry);

diff  --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
index 57371a8e08c3c..58b78d39533ca 100644
--- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
@@ -39,6 +39,7 @@ static_library("CodeGen") {
     "CFIFixup.cpp",
     "CFIInstrInserter.cpp",
     "CalcSpillWeights.cpp",
+    "CallBrPrepare.cpp",
     "CallingConvLower.cpp",
     "CodeGen.cpp",
     "CodeGenCommonISel.cpp",
@@ -77,7 +78,6 @@ static_library("CodeGen") {
     "ImplicitNullChecks.cpp",
     "IndirectBrExpandPass.cpp",
     "InitUndef.cpp",
-    "InlineAsmPrepare.cpp",
     "InlineSpiller.cpp",
     "InsertCodePrefetch.cpp",
     "InterferenceCache.cpp",