[llvm] [ExtendLifetimes] Implement llvm.fake.use to extend variable lifetimes (PR #86149)

Stephen Tozer via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 29 09:52:30 PDT 2024


https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/86149

>From 8b6b21ca023f3f89d4dab44b225fc18c276cf07d Mon Sep 17 00:00:00 2001
From: Wolfgang Pieb <wolfgang.pieb at sony.com>
Date: Thu, 21 Mar 2024 16:21:52 +0000
Subject: [PATCH] [ExtendLifetimes] Implement llvm.fake.use to extend variable
 lifetimes (#86149)

This patch is part of a set of patches that add an `-fextend-lifetimes`
flag to clang, which extends the lifetimes of local variables and
parameters for improved debuggability. In addition to that flag, the
patch series adds a pragma to selectively disable `-fextend-lifetimes`,
and an `-fextend-this-ptr` flag which functions as `-fextend-lifetimes`
for this pointers only. All changes and tests in these patches were
written by Wolfgang Pieb (@wolfy1961), while Stephen Tozer (@SLTozer)
has handled review and merging. The extend lifetimes flag is intended to
eventually be set on by `-Og`, as discussed in the RFC
here:

https://discourse.llvm.org/t/rfc-redefine-og-o1-and-add-a-new-level-of-og/72850

This patch implements a new intrinsic instruction in LLVM,
`llvm.fake.use` in IR and `FAKE_USE` in MIR, that takes a single operand
and has no effect other than "using" its operand, to ensure that its
operand remains live until after the fake use. This patch does not emit
fake uses anywhere; the next patch in this sequence causes them to be
emitted from the clang frontend, such that for each variable (or this) a
fake.use operand is inserted at the end of that variable's scope, using
that variable's value. This patch covers everything post-frontend, which
is largely just the basic plumbing for a new intrinsic/instruction,
along with a few steps to preserve the fake uses through optimizations
(such as moving them ahead of a tail call or translating them through
SROA).

Co-authored-by: Stephen Tozer <stephen.tozer at sony.com>
---
 llvm/docs/LangRef.rst                         |  36 ++++
 llvm/include/llvm/Analysis/PtrUseVisitor.h    |   6 +
 llvm/include/llvm/CodeGen/ISDOpcodes.h        |   5 +
 llvm/include/llvm/CodeGen/MachineInstr.h      |   2 +
 llvm/include/llvm/CodeGen/Passes.h            |   3 +
 llvm/include/llvm/CodeGen/SelectionDAGISel.h  |   1 +
 llvm/include/llvm/IR/Intrinsics.td            |   3 +
 llvm/include/llvm/InitializePasses.h          |   1 +
 .../llvm/Passes/MachinePassRegistry.def       |   1 +
 llvm/include/llvm/Support/TargetOpcodes.def   |   3 +
 llvm/include/llvm/Target/Target.td            |  10 ++
 llvm/lib/CodeGen/Analysis.cpp                 |   3 +-
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  19 ++
 llvm/lib/CodeGen/CMakeLists.txt               |   1 +
 llvm/lib/CodeGen/CodeGen.cpp                  |   1 +
 llvm/lib/CodeGen/CodeGenPrepare.cpp           |  44 ++++-
 .../CodeGen/DeadMachineInstructionElim.cpp    |   3 +-
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  |   8 +
 llvm/lib/CodeGen/GlobalISel/Utils.cpp         |   3 +
 llvm/lib/CodeGen/MachineCSE.cpp               |   3 +-
 llvm/lib/CodeGen/MachineScheduler.cpp         |   3 +-
 llvm/lib/CodeGen/MachineSink.cpp              |   2 +-
 llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp  | 162 ++++++++++++++++++
 llvm/lib/CodeGen/SelectionDAG/FastISel.cpp    |   3 +
 .../SelectionDAG/LegalizeFloatTypes.cpp       |  20 +++
 .../SelectionDAG/LegalizeIntegerTypes.cpp     |  19 ++
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   7 +
 .../SelectionDAG/LegalizeTypesGeneric.cpp     |  11 ++
 .../SelectionDAG/LegalizeVectorTypes.cpp      |  36 ++++
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  33 ++++
 .../SelectionDAG/SelectionDAGDumper.cpp       |   2 +
 .../CodeGen/SelectionDAG/SelectionDAGISel.cpp |  64 +++++++
 llvm/lib/CodeGen/TargetPassConfig.cpp         |   1 +
 llvm/lib/IR/Instruction.cpp                   |   5 +-
 llvm/lib/IR/Verifier.cpp                      |   1 +
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp  |   1 +
 llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp  |   1 +
 .../WebAssembly/WebAssemblyTargetMachine.cpp  |   1 +
 llvm/lib/Target/X86/X86FloatingPoint.cpp      |  32 ++++
 llvm/lib/Transforms/Scalar/SROA.cpp           |  30 ++++
 llvm/lib/Transforms/Utils/CloneFunction.cpp   |   6 +
 llvm/lib/Transforms/Utils/Local.cpp           |   3 +
 .../Utils/PromoteMemoryToRegister.cpp         |   3 +-
 .../ScalarEvolution/flags-from-poison-dbg.ll  |   2 +-
 llvm/test/CodeGen/AArch64/O0-pipeline.ll      |   1 +
 llvm/test/CodeGen/AArch64/O3-pipeline.ll      |   1 +
 llvm/test/CodeGen/AMDGPU/llc-pipeline.ll      |   5 +
 llvm/test/CodeGen/ARM/O3-pipeline.ll          |   1 +
 llvm/test/CodeGen/LoongArch/O0-pipeline.ll    |   1 +
 llvm/test/CodeGen/LoongArch/opt-pipeline.ll   |   1 +
 .../CodeGen/MIR/X86/fake-use-tailcall.mir     |  99 +++++++++++
 llvm/test/CodeGen/PowerPC/O0-pipeline.ll      |   1 +
 llvm/test/CodeGen/PowerPC/O3-pipeline.ll      |   1 +
 llvm/test/CodeGen/RISCV/O0-pipeline.ll        |   1 +
 llvm/test/CodeGen/RISCV/O3-pipeline.ll        |   1 +
 llvm/test/CodeGen/X86/O0-pipeline.ll          |   1 +
 llvm/test/CodeGen/X86/fake-use-hpfloat.ll     |  15 ++
 llvm/test/CodeGen/X86/fake-use-ld.ll          |  43 +++++
 llvm/test/CodeGen/X86/fake-use-scheduler.mir  | 123 +++++++++++++
 .../CodeGen/X86/fake-use-simple-tail-call.ll  |  24 +++
 .../CodeGen/X86/fake-use-suppress-load.ll     |  14 ++
 llvm/test/CodeGen/X86/fake-use-tailcall.ll    |  37 ++++
 llvm/test/CodeGen/X86/fake-use-vector.ll      |  39 +++++
 llvm/test/CodeGen/X86/fake-use-vector2.ll     |  27 +++
 llvm/test/CodeGen/X86/fake-use-zero-length.ll |  30 ++++
 llvm/test/CodeGen/X86/opt-pipeline.ll         |   1 +
 .../DebugInfo/AArch64/fake-use-global-isel.ll |  98 +++++++++++
 llvm/test/DebugInfo/Inputs/check-fake-use.py  | 107 ++++++++++++
 llvm/test/DebugInfo/X86/fake-use.ll           |  96 +++++++++++
 .../GlobalISelCombinerEmitter/match-table.td  |  54 +++---
 .../CodeGenPrepare/X86/fake-use-phi.ll        |  50 ++++++
 .../CodeGenPrepare/X86/fake-use-split-ret.ll  |  37 ++++
 .../test/Transforms/GVN/fake-use-constprop.ll |  60 +++++++
 llvm/test/Transforms/SROA/fake-use-escape.ll  |  21 +++
 llvm/test/Transforms/SROA/fake-use-sroa.ll    |  52 ++++++
 .../gn/secondary/llvm/lib/CodeGen/BUILD.gn    |   1 +
 76 files changed, 1609 insertions(+), 38 deletions(-)
 create mode 100644 llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp
 create mode 100644 llvm/test/CodeGen/MIR/X86/fake-use-tailcall.mir
 create mode 100644 llvm/test/CodeGen/X86/fake-use-hpfloat.ll
 create mode 100644 llvm/test/CodeGen/X86/fake-use-ld.ll
 create mode 100644 llvm/test/CodeGen/X86/fake-use-scheduler.mir
 create mode 100644 llvm/test/CodeGen/X86/fake-use-simple-tail-call.ll
 create mode 100644 llvm/test/CodeGen/X86/fake-use-suppress-load.ll
 create mode 100644 llvm/test/CodeGen/X86/fake-use-tailcall.ll
 create mode 100644 llvm/test/CodeGen/X86/fake-use-vector.ll
 create mode 100644 llvm/test/CodeGen/X86/fake-use-vector2.ll
 create mode 100644 llvm/test/CodeGen/X86/fake-use-zero-length.ll
 create mode 100644 llvm/test/DebugInfo/AArch64/fake-use-global-isel.ll
 create mode 100644 llvm/test/DebugInfo/Inputs/check-fake-use.py
 create mode 100644 llvm/test/DebugInfo/X86/fake-use.ll
 create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/fake-use-phi.ll
 create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/fake-use-split-ret.ll
 create mode 100644 llvm/test/Transforms/GVN/fake-use-constprop.ll
 create mode 100644 llvm/test/Transforms/SROA/fake-use-escape.ll
 create mode 100644 llvm/test/Transforms/SROA/fake-use-sroa.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 8c696cb16e77f8..cf0a6f96fb012e 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -29477,6 +29477,42 @@ execution, but is unknown at compile time.
 If the result value does not fit in the result type, then the result is
 a :ref:`poison value <poisonvalues>`.
 
+.. _llvm_fake_use:
+
+'``llvm.fake.use``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.fake.use(...)
+
+Overview:
+"""""""""
+
+The ``llvm.fake.use`` intrinsic is a no-op. It takes a single
+value as an operand and is treated as a use of that operand, to force the
+optimizer to preserve that value prior to the fake use. This is used for
+extending the lifetimes of variables, where this intrinsic placed at the end of
+a variable's scope helps prevent that variable from being optimized out.
+
+Arguments:
+""""""""""
+
+The ``llvm.fake.use`` intrinsic takes one argument, which may be any
+function-local SSA value. Note that the signature is variadic so that the
+intrinsic can take any type of argument, but passing more than one argument will
+result in an error.
+
+Semantics:
+""""""""""
+
+This intrinsic does nothing, but optimizers must consider it a use of its single
+operand and should try to preserve the intrinsic and its position in the
+function.
+
 
 Stack Map Intrinsics
 --------------------
diff --git a/llvm/include/llvm/Analysis/PtrUseVisitor.h b/llvm/include/llvm/Analysis/PtrUseVisitor.h
index b6cc14d2077af0..f5c23b1b4e014d 100644
--- a/llvm/include/llvm/Analysis/PtrUseVisitor.h
+++ b/llvm/include/llvm/Analysis/PtrUseVisitor.h
@@ -278,6 +278,12 @@ class PtrUseVisitor : protected InstVisitor<DerivedT>,
     default:
       return Base::visitIntrinsicInst(II);
 
+    // We escape pointers used by a fake_use to prevent SROA from transforming
+    // them.
+    case Intrinsic::fake_use:
+      PI.setEscaped(&II);
+      return;
+
     case Intrinsic::lifetime_start:
     case Intrinsic::lifetime_end:
       return; // No-op intrinsics.
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 86ff2628975942..187d624f0a73b9 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1372,6 +1372,11 @@ enum NodeType {
   LIFETIME_START,
   LIFETIME_END,
 
+  /// FAKE_USE represents a use of the operand but does not do anything.
+  /// Its purpose is the extension of the operand's lifetime mainly for
+  /// debugging purposes.
+  FAKE_USE,
+
   /// GC_TRANSITION_START/GC_TRANSITION_END - These operators mark the
   /// beginning and end of GC transition  sequence, and carry arbitrary
   /// information that target might need for lowering.  The first operand is
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 04c8144f2fe7af..62667cc8ef3800 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -1435,6 +1435,8 @@ class MachineInstr
     return getOpcode() == TargetOpcode::EXTRACT_SUBREG;
   }
 
+  bool isFakeUse() const { return getOpcode() == TargetOpcode::FAKE_USE; }
+
   /// Return true if the instruction behaves like a copy.
   /// This does not include native copy instructions.
   bool isCopyLike() const {
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index c7c2178571215b..dbdd110b0600e5 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -440,6 +440,9 @@ namespace llvm {
   // metadata after llvm SanitizerBinaryMetadata pass.
   extern char &MachineSanitizerBinaryMetadataID;
 
+  /// RemoveLoadsIntoFakeUses pass.
+  extern char &RemoveLoadsIntoFakeUsesID;
+
   /// RemoveRedundantDebugValues pass.
   extern char &RemoveRedundantDebugValuesID;
 
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index fc0590b1a1b69e..f6191c6fdb7fe6 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -463,6 +463,7 @@ class SelectionDAGISel {
   void Select_READ_REGISTER(SDNode *Op);
   void Select_WRITE_REGISTER(SDNode *Op);
   void Select_UNDEF(SDNode *N);
+  void Select_FAKE_USE(SDNode *N);
   void CannotYetSelect(SDNode *N);
 
   void Select_FREEZE(SDNode *N);
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index e3bf0446575ae5..232d6be1073f49 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1835,6 +1835,9 @@ def int_is_constant : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty],
                                 [IntrNoMem, IntrWillReturn, IntrConvergent],
                                 "llvm.is.constant">;
 
+// Introduce a use of the argument without generating any code.
+def int_fake_use : Intrinsic<[], [llvm_vararg_ty]>;
+
 // Intrinsic to mask out bits of a pointer.
 // First argument must be pointer or vector of pointer. This is checked by the
 // verifier.
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index cc5e93c58f564a..47a1ca15fc0d1f 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -264,6 +264,7 @@ void initializeRegionOnlyViewerPass(PassRegistry &);
 void initializeRegionPrinterPass(PassRegistry &);
 void initializeRegionViewerPass(PassRegistry &);
 void initializeRegisterCoalescerPass(PassRegistry &);
+void initializeRemoveLoadsIntoFakeUsesPass(PassRegistry &);
 void initializeRemoveRedundantDebugValuesPass(PassRegistry &);
 void initializeRenameIndependentSubregsPass(PassRegistry &);
 void initializeReplaceWithVeclibLegacyPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 05baf514fa7210..b710b1c46f643f 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -250,6 +250,7 @@ DUMMY_MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass
 DUMMY_MACHINE_FUNCTION_PASS("regalloc", RegAllocPass)
 DUMMY_MACHINE_FUNCTION_PASS("regallocscoringpass", RegAllocScoringPass)
 DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass)
+DUMMY_MACHINE_FUNCTION_PASS("remove-loads-into-fake-uses", RemoveLoadsIntoFakeUsesPass)
 DUMMY_MACHINE_FUNCTION_PASS("removeredundantdebugvalues", RemoveRedundantDebugValuesPass)
 DUMMY_MACHINE_FUNCTION_PASS("rename-independent-subregs", RenameIndependentSubregsPass)
 DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass)
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 9fb6de49fb2055..635c265a433631 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -217,6 +217,9 @@ HANDLE_TARGET_OPCODE(PATCHABLE_TYPED_EVENT_CALL)
 
 HANDLE_TARGET_OPCODE(ICALL_BRANCH_FUNNEL)
 
+/// Represents a use of the operand but generates no code.
+HANDLE_TARGET_OPCODE(FAKE_USE)
+
 // This is a fence with the singlethread scope. It represents a compiler memory
 // barrier, but does not correspond to any generated instruction.
 HANDLE_TARGET_OPCODE(MEMBARRIER)
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index 34332386085870..b2eb250ae60b60 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1418,6 +1418,16 @@ def FAULTING_OP : StandardPseudoInstruction {
   let isTerminator = true;
   let isBranch = true;
 }
+def FAKE_USE : StandardPseudoInstruction {
+  // An instruction that uses its operands but does nothing; this instruction
+  // will be treated specially by CodeGen passes, distinguishing it from any
+  // otherwise equivalent instructions.
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
+  let AsmString = "FAKE_USE";
+  let hasSideEffects = 0;
+  let isMeta = true;
+}
 def PATCHABLE_OP : StandardPseudoInstruction {
   let OutOperandList = (outs);
   let InOperandList = (ins variable_ops);
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index 128060ec912c76..f77b733c6c8f69 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -567,7 +567,8 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM,
     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
       if (II->getIntrinsicID() == Intrinsic::lifetime_end ||
           II->getIntrinsicID() == Intrinsic::assume ||
-          II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl)
+          II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl ||
+          II->getIntrinsicID() == Intrinsic::fake_use)
         continue;
     if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
         !isSafeToSpeculativelyExecute(&*BBI))
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 60cb26973ead41..19d23c8ba96783 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1131,6 +1131,21 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
   AP.OutStreamer->addBlankLine();
 }
 
+static void emitFakeUse(const MachineInstr *MI, AsmPrinter &AP) {
+  std::string Str;
+  raw_string_ostream OS(Str);
+  OS << "fake_use:";
+  for (const MachineOperand &Op : MI->operands()) {
+    // In some circumstances we can end up with fake uses of constants; skip
+    // these.
+    if (!Op.isReg())
+      continue;
+    OS << ' ' << printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo());
+  }
+  AP.OutStreamer->AddComment(OS.str());
+  AP.OutStreamer->addBlankLine();
+}
+
 /// emitDebugValueComment - This method handles the target-independent form
 /// of DBG_VALUE, returning true if it was able to do so.  A false return
 /// means the target will need to handle MI in EmitInstruction.
@@ -1799,6 +1814,10 @@ void AsmPrinter::emitFunctionBody() {
       case TargetOpcode::KILL:
         if (isVerbose()) emitKill(&MI, *this);
         break;
+      case TargetOpcode::FAKE_USE:
+        if (isVerbose())
+          emitFakeUse(&MI, *this);
+        break;
       case TargetOpcode::PSEUDO_PROBE:
         emitPseudoProbe(MI);
         break;
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index f1607f85c5b319..ae12ce1170f703 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -200,6 +200,7 @@ add_llvm_component_library(LLVMCodeGen
   RegisterUsageInfo.cpp
   RegUsageInfoCollector.cpp
   RegUsageInfoPropagate.cpp
+  RemoveLoadsIntoFakeUses.cpp
   ReplaceWithVeclib.cpp
   ResetMachineFunctionPass.cpp
   RegisterBank.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 31fa4c105cef80..177702054a0e31 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -116,6 +116,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeRegUsageInfoCollectorPass(Registry);
   initializeRegUsageInfoPropagationPass(Registry);
   initializeRegisterCoalescerPass(Registry);
+  initializeRemoveLoadsIntoFakeUsesPass(Registry);
   initializeRemoveRedundantDebugValuesPass(Registry);
   initializeRenameIndependentSubregsPass(Registry);
   initializeSafeStackLegacyPassPass(Registry);
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index da6c758d53d487..271a047fc6a7b8 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2800,12 +2800,34 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
     return false;
   };
 
+  SmallVector<const IntrinsicInst *, 4> FakeUses;
+
+  auto isFakeUse = [&FakeUses](const Instruction *Inst) {
+    if (auto *II = dyn_cast<IntrinsicInst>(Inst);
+        II && II->getIntrinsicID() == Intrinsic::fake_use) {
+      // Record the instruction so it can be preserved when the exit block is
+      // removed. Do not preserve the fake use that uses the result of the
+      // PHI instruction.
+      // Do not copy fake uses that use the result of a PHI node.
+      // FIXME: If we do want to copy the fake use into the return blocks, we
+      // have to figure out which of the PHI node operands to use for each
+      // copy.
+      if (!isa<PHINode>(II->getOperand(0))) {
+        FakeUses.push_back(II);
+      }
+      return true;
+    }
+
+    return false;
+  };
+
   // Make sure there are no instructions between the first instruction
   // and return.
   const Instruction *BI = BB->getFirstNonPHI();
   // Skip over debug and the bitcast.
   while (isa<DbgInfoIntrinsic>(BI) || BI == BCI || BI == EVI ||
-         isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI))
+         isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI) ||
+         isFakeUse(BI))
     BI = BI->getNextNode();
   if (BI != RetI)
     return false;
@@ -2814,6 +2836,9 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
   /// call.
   const Function *F = BB->getParent();
   SmallVector<BasicBlock *, 4> TailCallBBs;
+  // Record the call instructions so we can insert any fake uses
+  // that need to be preserved before them.
+  SmallVector<CallInst *, 4> CallInsts;
   if (PN) {
     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
       // Look through bitcasts.
@@ -2825,6 +2850,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
           TLI->mayBeEmittedAsTailCall(CI) &&
           attributesPermitTailCall(F, CI, RetI, *TLI)) {
         TailCallBBs.push_back(PredBB);
+        CallInsts.push_back(CI);
       } else {
         // Consider the cases in which the phi value is indirectly produced by
         // the tail call, for example when encountering memset(), memmove(),
@@ -2844,8 +2870,10 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
             isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
             IncomingVal == CI->getArgOperand(0) &&
             TLI->mayBeEmittedAsTailCall(CI) &&
-            attributesPermitTailCall(F, CI, RetI, *TLI))
+            attributesPermitTailCall(F, CI, RetI, *TLI)) {
           TailCallBBs.push_back(PredBB);
+          CallInsts.push_back(CI);
+        }
       }
     }
   } else {
@@ -2863,6 +2891,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
               (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
                V == CI->getArgOperand(0))) {
             TailCallBBs.push_back(Pred);
+            CallInsts.push_back(CI);
           }
         }
       }
@@ -2889,8 +2918,17 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
   }
 
   // If we eliminated all predecessors of the block, delete the block now.
-  if (Changed && !BB->hasAddressTaken() && pred_empty(BB))
+  if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) {
+    // Copy the fake uses found in the original return block to all blocks
+    // that contain tail calls.
+    for (auto *CI : CallInsts) {
+      for (auto const *FakeUse : FakeUses) {
+        auto *ClonedInst = FakeUse->clone();
+        ClonedInst->insertBefore(CI);
+      }
+    }
     BB->eraseFromParent();
+  }
 
   return Changed;
 }
diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 7fc25cd889a0df..332ed37bd2b79f 100644
--- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -87,7 +87,8 @@ bool DeadMachineInstructionElimImpl::isDead(const MachineInstr *MI) const {
     return false;
 
   // Don't delete frame allocation labels.
-  if (MI->getOpcode() == TargetOpcode::LOCAL_ESCAPE)
+  if (MI->getOpcode() == TargetOpcode::LOCAL_ESCAPE ||
+      MI->getOpcode() == TargetOpcode::FAKE_USE)
     return false;
 
   // Don't delete instructions with side effects.
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index f44af78cded46d..968d0a2a5c75e4 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2193,6 +2193,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
     }
     return true;
   }
+  case Intrinsic::fake_use: {
+    SmallVector<llvm::SrcOp, 4> VRegs;
+    for (const auto &Arg : CI.args())
+      for (auto VReg : getOrCreateVRegs(*Arg))
+        VRegs.push_back(VReg);
+    MIRBuilder.buildInstr(TargetOpcode::FAKE_USE, std::nullopt, VRegs);
+    return true;
+  }
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
     assert(DI.getVariable() && "Missing variable");
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index cfdd9905c16fa6..b1270e7aeb875c 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -228,6 +228,9 @@ bool llvm::isTriviallyDead(const MachineInstr &MI,
   // Don't delete frame allocation labels.
   if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE)
     return false;
+  // Don't delete fake uses.
+  if (MI.getOpcode() == TargetOpcode::FAKE_USE)
+    return false;
   // LIFETIME markers should be preserved even if they seem dead.
   if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
       MI.getOpcode() == TargetOpcode::LIFETIME_END)
diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp
index 27bbf5599b6046..aadc54b495fe22 100644
--- a/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/llvm/lib/CodeGen/MachineCSE.cpp
@@ -406,7 +406,8 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
 
 bool MachineCSE::isCSECandidate(MachineInstr *MI) {
   if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
-      MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo())
+      MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo() ||
+      MI->isFakeUse())
     return false;
 
   // Ignore copies.
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 7a3cf96ccffe0a..4e6d34346b1d80 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -530,7 +530,8 @@ static bool isSchedBoundary(MachineBasicBlock::iterator MI,
                             MachineBasicBlock *MBB,
                             MachineFunction *MF,
                             const TargetInstrInfo *TII) {
-  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
+  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF) ||
+         MI->isFakeUse();
 }
 
 /// A region of an MBB for scheduling.
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index fe515ef5be541f..609f9af9767f5d 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -833,7 +833,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
     if (!ProcessedBegin)
       --I;
 
-    if (MI.isDebugOrPseudoInstr()) {
+    if (MI.isDebugOrPseudoInstr() || MI.isFakeUse()) {
       if (MI.isDebugValue())
         ProcessDbgInst(MI);
       continue;
diff --git a/llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp b/llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp
new file mode 100644
index 00000000000000..232181a199b8c2
--- /dev/null
+++ b/llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp
@@ -0,0 +1,162 @@
+//===---- RemoveLoadsIntoFakeUses.cpp - Remove loads with no real uses ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// The FAKE_USE instruction is used to preserve certain values through
+/// optimizations for the sake of debugging. This may result in spilled values
+/// being loaded into registers that are only used by FAKE_USEs; this is not
+/// necessary for debugging purposes, because at that point the value must be on
+/// the stack and hence available for debugging. Therefore, this pass removes
+/// loads that are only used by FAKE_USEs.
+///
+/// This pass should run very late, to ensure that we don't inadvertently
+/// shorten stack lifetimes by removing these loads, since the FAKE_USEs will
+/// also no longer be in effect. Running immediately before LiveDebugValues
+/// ensures that LDV will have accurate information of the machine location of
+/// debug values.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "remove-loads-into-fake-uses"
+
+STATISTIC(NumLoadsDeleted, "Number of dead load instructions deleted");
+STATISTIC(NumFakeUsesDeleted, "Number of FAKE_USE instructions deleted");
+
+class RemoveLoadsIntoFakeUses : public MachineFunctionPass {
+public:
+  static char ID;
+
+  RemoveLoadsIntoFakeUses() : MachineFunctionPass(ID) {
+    initializeRemoveLoadsIntoFakeUsesPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
+
+  StringRef getPassName() const override {
+    return "Remove Loads Into Fake Uses";
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+char RemoveLoadsIntoFakeUses::ID = 0;
+char &llvm::RemoveLoadsIntoFakeUsesID = RemoveLoadsIntoFakeUses::ID;
+
+INITIALIZE_PASS_BEGIN(RemoveLoadsIntoFakeUses, DEBUG_TYPE,
+                      "Remove Loads Into Fake Uses", false, false)
+INITIALIZE_PASS_END(RemoveLoadsIntoFakeUses, DEBUG_TYPE,
+                    "Remove Loads Into Fake Uses", false, false)
+
+bool RemoveLoadsIntoFakeUses::runOnMachineFunction(MachineFunction &MF) {
+  // Only `optdebug` functions should contain FAKE_USEs, so don't try to run
+  // this for other functions.
+  if (!MF.getFunction().hasFnAttribute(Attribute::OptimizeForDebugging) ||
+      skipFunction(MF.getFunction()))
+    return false;
+
+  bool AnyChanges = false;
+
+  LiveRegUnits LivePhysRegs;
+  const MachineRegisterInfo *MRI = &MF.getRegInfo();
+  const TargetSubtargetInfo &ST = MF.getSubtarget();
+  const TargetInstrInfo *TII = ST.getInstrInfo();
+  const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+
+  SmallDenseMap<Register, SmallVector<MachineInstr *>> RegFakeUses;
+  LivePhysRegs.init(*TRI);
+  SmallVector<MachineInstr *, 16> Statepoints;
+  for (MachineBasicBlock *MBB : post_order(&MF)) {
+    LivePhysRegs.addLiveOuts(*MBB);
+
+    for (MachineInstr &MI : make_early_inc_range(reverse(*MBB))) {
+      if (MI.isFakeUse()) {
+        for (const MachineOperand &MO : MI.operands()) {
+          // Track the Fake Uses that use this register so that we can delete
+          // them if we delete the corresponding load.
+          if (MO.isReg())
+            RegFakeUses[MO.getReg()].push_back(&MI);
+        }
+        // Do not record FAKE_USE uses in LivePhysRegs so that we can recognize
+        // otherwise-unused loads.
+        continue;
+      }
+
+      // If the restore size is not std::nullopt then we are dealing with a
+      // reload of a spilled register.
+      if (MI.getRestoreSize(TII)) {
+        Register Reg = MI.getOperand(0).getReg();
+        assert(Reg.isPhysical() && "VReg seen in function with NoVRegs set?");
+        // Don't delete live physreg defs, or any reserved register defs.
+        if (!LivePhysRegs.available(Reg) || MRI->isReserved(Reg))
+          continue;
+        // There should be an exact match between the loaded register and the
+        // FAKE_USE use. If not, this is a load that is unused by anything? It
+        // should probably be deleted, but that's outside of this pass' scope.
+        if (RegFakeUses.contains(Reg)) {
+          LLVM_DEBUG(dbgs() << "RemoveLoadsIntoFakeUses: DELETING: " << MI);
+          // It is possible that some DBG_VALUE instructions refer to this
+          // instruction. They will be deleted in the live debug variable
+          // analysis.
+          MI.eraseFromParent();
+          AnyChanges = true;
+          ++NumLoadsDeleted;
+          // Each FAKE_USE now appears to be a fake use of the previous value
+          // of the loaded register; delete them to avoid incorrectly
+          // interpreting them as such.
+          for (MachineInstr *FakeUse : RegFakeUses[Reg]) {
+            LLVM_DEBUG(dbgs()
+                       << "RemoveLoadsIntoFakeUses: DELETING: " << *FakeUse);
+            FakeUse->eraseFromParent();
+          }
+          NumFakeUsesDeleted += RegFakeUses[Reg].size();
+          RegFakeUses[Reg].clear();
+        }
+        continue;
+      }
+
+      // In addition to tracking LivePhysRegs, we need to clear RegFakeUses each
+      // time a register is defined, as existing FAKE_USEs no longer apply to
+      // that register.
+      if (!RegFakeUses.empty()) {
+        for (const MachineOperand &MO : MI.operands()) {
+          if (MO.isReg() && MO.isDef()) {
+            Register Reg = MO.getReg();
+            assert(Reg.isPhysical() &&
+                   "VReg seen in function with NoVRegs set?");
+            for (MCRegUnit Unit : TRI->regunits(Reg))
+              RegFakeUses.erase(Unit);
+          }
+        }
+      }
+      LivePhysRegs.stepBackward(MI);
+    }
+  }
+
+  return AnyChanges;
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 067f82c99adca1..162af2d9d708a9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1464,6 +1464,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
     updateValueMap(II, ResultReg);
     return true;
   }
+  case Intrinsic::fake_use:
+    // At -O0, we don't need fake use, so just ignore it.
+    return true;
   case Intrinsic::experimental_stackmap:
     return selectStackmap(II);
   case Intrinsic::experimental_patchpoint_void:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 221dcfe145594f..b5c80005a0ecc1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -2438,6 +2438,9 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
       report_fatal_error("Do not know how to promote this operator's operand!");
 
     case ISD::BITCAST:    R = PromoteFloatOp_BITCAST(N, OpNo); break;
+    case ISD::FAKE_USE:
+      R = PromoteFloatOp_FAKE_USE(N, OpNo);
+      break;
     case ISD::FCOPYSIGN:  R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break;
     case ISD::FP_TO_SINT:
     case ISD::FP_TO_UINT:
@@ -2480,6 +2483,13 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo) {
   return DAG.getBitcast(N->getValueType(0), Convert);
 }
 
+SDValue DAGTypeLegalizer::PromoteFloatOp_FAKE_USE(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 1 && "Only Operand 1 must need promotion here");
+  SDValue Op = GetPromotedFloat(N->getOperand(OpNo));
+  return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::Other, N->getOperand(0),
+                     Op);
+}
+
 // Promote Operand 1 of FCOPYSIGN.  Operand 0 ought to be handled by
 // PromoteFloatRes_FCOPYSIGN.
 SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) {
@@ -3433,6 +3443,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
                        "operand!");
 
   case ISD::BITCAST:    Res = SoftPromoteHalfOp_BITCAST(N); break;
+  case ISD::FAKE_USE:
+    Res = SoftPromoteHalfOp_FAKE_USE(N, OpNo);
+    break;
   case ISD::FCOPYSIGN:  Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break;
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break;
@@ -3473,6 +3486,13 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_BITCAST(SDNode *N) {
   return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0);
 }
 
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 1 && "Only Operand 1 must need promotion here");
+  SDValue Op = GetSoftPromotedHalf(N->getOperand(OpNo));
+  return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::Other, N->getOperand(0),
+                     Op);
+}
+
 SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FCOPYSIGN(SDNode *N,
                                                       unsigned OpNo) {
   assert(OpNo == 1 && "Only Operand 1 must need promotion here");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index c19a5a4995627a..05971152d535c6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1934,6 +1934,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
   case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
   case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
+  case ISD::FAKE_USE:
+    Res = PromoteIntOp_FAKE_USE(N);
+    break;
   case ISD::INSERT_VECTOR_ELT:
     Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);
     break;
@@ -5280,6 +5283,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::BR_CC:             Res = ExpandIntOp_BR_CC(N); break;
   case ISD::BUILD_VECTOR:      Res = ExpandOp_BUILD_VECTOR(N); break;
   case ISD::EXTRACT_ELEMENT:   Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+  case ISD::FAKE_USE:
+    Res = ExpandOp_FAKE_USE(N);
+    break;
   case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
   case ISD::SCALAR_TO_VECTOR:  Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
   case ISD::EXPERIMENTAL_VP_SPLAT:
@@ -6115,6 +6121,19 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_SUBVECTOR(SDNode *N) {
   return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
 }
 
+// FIXME: We wouldn't need this if clang could promote short integers
+// that are arguments to FAKE_USE.
+SDValue DAGTypeLegalizer::PromoteIntOp_FAKE_USE(SDNode *N) {
+  SDLoc dl(N);
+  SDValue V0 = N->getOperand(0);
+  SDValue V1 = N->getOperand(1);
+  EVT InVT1 = V1.getValueType();
+  SDValue VPromoted =
+      DAG.getNode(ISD::ANY_EXTEND, dl,
+                  TLI.getTypeToTransformTo(*DAG.getContext(), InVT1), V1);
+  return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), V0, VPromoted);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {
   SDLoc dl(N);
   SDValue V0 = GetPromotedInteger(N->getOperand(0));
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 1088db4bdbe0b3..4577346a02d605 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -391,6 +391,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N);
+  SDValue PromoteIntOp_FAKE_USE(SDNode *N);
   SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
   SDValue PromoteIntOp_ScalarOp(SDNode *N);
   SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
@@ -755,6 +756,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
 
   bool PromoteFloatOperand(SDNode *N, unsigned OpNo);
   SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
+  SDValue PromoteFloatOp_FAKE_USE(SDNode *N, unsigned OpNo);
   SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
   SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
   SDValue PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N, unsigned OpNo);
@@ -800,6 +802,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
 
   bool SoftPromoteHalfOperand(SDNode *N, unsigned OpNo);
   SDValue SoftPromoteHalfOp_BITCAST(SDNode *N);
+  SDValue SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo);
   SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
   SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N);
   SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N);
@@ -877,6 +880,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue ScalarizeVecOp_VECREDUCE(SDNode *N);
   SDValue ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N);
   SDValue ScalarizeVecOp_CMP(SDNode *N);
+  SDValue ScalarizeVecOp_FAKE_USE(SDNode *N);
 
   //===--------------------------------------------------------------------===//
   // Vector Splitting Support: LegalizeVectorTypes.cpp
@@ -964,6 +968,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue SplitVecOp_ExtVecInRegOp(SDNode *N);
+  SDValue SplitVecOp_FAKE_USE(SDNode *N);
   SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N, unsigned OpNo);
@@ -1069,6 +1074,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecOp_INSERT_SUBVECTOR(SDNode *N);
   SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue WidenVecOp_EXTEND_VECTOR_INREG(SDNode *N);
+  SDValue WidenVecOp_FAKE_USE(SDNode *N);
   SDValue WidenVecOp_STORE(SDNode* N);
   SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo);
   SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo);
@@ -1198,6 +1204,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue ExpandOp_BITCAST          (SDNode *N);
   SDValue ExpandOp_BUILD_VECTOR     (SDNode *N);
   SDValue ExpandOp_EXTRACT_ELEMENT  (SDNode *N);
+  SDValue ExpandOp_FAKE_USE(SDNode *N);
   SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
   SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);
   SDValue ExpandOp_NormalStore      (SDNode *N, unsigned OpNo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index a55364ea2c4e5b..b402e823762764 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -403,6 +403,17 @@ SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
   return N->getConstantOperandVal(1) ? Hi : Lo;
 }
 
+// Split the integer operand in two and create a second FAKE_USE node for
+// the other half. The original SDNode is updated in place.
+SDValue DAGTypeLegalizer::ExpandOp_FAKE_USE(SDNode *N) {
+  SDValue Lo, Hi;
+  SDValue Chain = N->getOperand(0);
+  GetExpandedOp(N->getOperand(1), Lo, Hi);
+  SDValue LoUse = DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, Chain, Lo);
+  DAG.UpdateNodeOperands(N, LoUse, Hi);
+  return SDValue(N, 0);
+}
+
 SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
   // The vector type is legal but the element type needs expansion.
   EVT VecVT = N->getValueType(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 475d5806467d98..4c6da7c5df6b40 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -746,6 +746,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
   case ISD::BITCAST:
     Res = ScalarizeVecOp_BITCAST(N);
     break;
+  case ISD::FAKE_USE:
+    Res = ScalarizeVecOp_FAKE_USE(N);
+    break;
   case ISD::ANY_EXTEND:
   case ISD::ZERO_EXTEND:
   case ISD::SIGN_EXTEND:
@@ -846,6 +849,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
                      N->getValueType(0), Elt);
 }
 
+// Need to legalize vector operands of fake uses. Must be <1 x ty>.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_FAKE_USE(SDNode *N) {
+  assert(N->getOperand(1).getValueType().getVectorNumElements() == 1 &&
+         "Fake Use: Unexpected vector type!");
+  SDValue Elt = GetScalarizedVector(N->getOperand(1));
+  return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), Elt);
+}
+
 /// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
 /// Do the operation on the element instead.
 SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
@@ -3291,6 +3302,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     Res = SplitVecOp_CMP(N);
     break;
 
+  case ISD::FAKE_USE:
+    Res = SplitVecOp_FAKE_USE(N);
+    break;
   case ISD::ANY_EXTEND_VECTOR_INREG:
   case ISD::SIGN_EXTEND_VECTOR_INREG:
   case ISD::ZERO_EXTEND_VECTOR_INREG:
@@ -3505,6 +3519,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
 }
 
+// Split a FAKE_USE use of a vector into FAKE_USEs of hi and lo part.
+SDValue DAGTypeLegalizer::SplitVecOp_FAKE_USE(SDNode *N) {
+  SDValue Lo, Hi;
+  GetSplitVector(N->getOperand(1), Lo, Hi);
+  SDValue Chain =
+      DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), Lo);
+  return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, Chain, Hi);
+}
+
 SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
   // For example, i64 = BITCAST v4i16 on alpha.  Typically the vector will
   // end up being split all the way down to individual components.  Convert the
@@ -6466,6 +6489,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
     report_fatal_error("Do not know how to widen this operator's operand!");
 
   case ISD::BITCAST:            Res = WidenVecOp_BITCAST(N); break;
+  case ISD::FAKE_USE:
+    Res = WidenVecOp_FAKE_USE(N);
+    break;
   case ISD::CONCAT_VECTORS:     Res = WidenVecOp_CONCAT_VECTORS(N); break;
   case ISD::INSERT_SUBVECTOR:   Res = WidenVecOp_INSERT_SUBVECTOR(N); break;
   case ISD::EXTRACT_SUBVECTOR:  Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
@@ -6851,6 +6877,16 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
   return CreateStackStoreLoad(InOp, VT);
 }
 
+// Vectors with sizes that are not powers of 2 need to be widened to the
+// next largest power of 2. For example, we may get a vector of 3 32-bit
+// integers or of 6 16-bit integers, both of which have to be widened to a
+// 128-bit vector.
+SDValue DAGTypeLegalizer::WidenVecOp_FAKE_USE(SDNode *N) {
+  SDValue WidenedOp = GetWidenedVector(N->getOperand(1));
+  return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0),
+                     WidenedOp);
+}
+
 SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
   EVT VT = N->getValueType(0);
   EVT EltVT = VT.getVectorElementType();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ad24704d940a36..521a4fee8aafe0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1622,6 +1622,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
     SDValue N = NodeMap[V];
     if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
       N = UnusedArgNodeMap[V];
+
     if (N.getNode()) {
       // Only emit func arg dbg value for non-variadic dbg.values for now.
       if (!IsVariadic &&
@@ -7703,6 +7704,38 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     return;
   }
 
+  case Intrinsic::fake_use: {
+    Value *V = I.getArgOperand(0);
+    SDValue Ops[2];
+    // For Values not declared or previously used in this basic block, the
+    // NodeMap will not have an entry, and `getValue` will assert if V has no
+    // valid register value.
+    auto FakeUseValue = [&]() -> SDValue {
+      SDValue &N = NodeMap[V];
+      if (N.getNode())
+        return N;
+
+      // If there's a virtual register allocated and initialized for this
+      // value, use it.
+      if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
+        return copyFromReg;
+      // FIXME: Do we want to preserve constants? It seems pointless.
+      if (isa<Constant>(V))
+        return getValue(V);
+      return SDValue();
+    }();
+    if (!FakeUseValue || FakeUseValue.isUndef())
+      return;
+    Ops[0] = getRoot();
+    Ops[1] = FakeUseValue;
+    // Also, do not translate a fake use with an undef operand, or any other
+    // empty SDValues.
+    if (!Ops[1] || Ops[1].isUndef())
+      return;
+    DAG.setRoot(DAG.getNode(ISD::FAKE_USE, sdl, MVT::Other, Ops));
+    return;
+  }
+
   case Intrinsic::eh_exceptionpointer:
   case Intrinsic::eh_exceptioncode: {
     // Get the exception pointer vreg, copy from it, and resize it to fit.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 001f782f209fdb..a253d1a0e20170 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -454,6 +454,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::UBSANTRAP:                  return "ubsantrap";
   case ISD::LIFETIME_START:             return "lifetime.start";
   case ISD::LIFETIME_END:               return "lifetime.end";
+  case ISD::FAKE_USE:
+    return "fake_use";
   case ISD::PSEUDO_PROBE:
     return "pseudoprobe";
   case ISD::GC_TRANSITION_START:        return "gc_transition.start";
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 09bde54b9aaa5d..8e268d4f4968ea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -804,6 +804,50 @@ static void reportFastISelFailure(MachineFunction &MF,
   LLVM_DEBUG(dbgs() << R.getMsg() << "\n");
 }
 
+// Detect any fake uses that follow a tail call and move them before the tail
+// call. Ignore fake uses that use values that are def'd by or after the tail
+// call.
+static void preserveFakeUses(BasicBlock::iterator Begin,
+                             BasicBlock::iterator End) {
+  BasicBlock::iterator I = End;
+  if (--I == Begin || !isa<ReturnInst>(*I))
+    return;
+  // Detect whether there are any fake uses trailing a (potential) tail call.
+  bool HaveFakeUse = false;
+  bool HaveTailCall = false;
+  do {
+    if (const CallInst *CI = dyn_cast<CallInst>(--I))
+      if (CI->isTailCall()) {
+        HaveTailCall = true;
+        break;
+      }
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+      if (II->getIntrinsicID() == Intrinsic::fake_use)
+        HaveFakeUse = true;
+  } while (I != Begin);
+
+  // If we didn't find any tail calls followed by fake uses, we are done.
+  if (!HaveTailCall || !HaveFakeUse)
+    return;
+
+  SmallVector<IntrinsicInst *> FakeUses;
+  // Record the fake uses we found so we can move them to the front of the
+  // tail call. Ignore them if they use a value that is def'd by or after
+  // the tail call.
+  for (BasicBlock::iterator Inst = I; Inst != End; Inst++) {
+    if (IntrinsicInst *FakeUse = dyn_cast<IntrinsicInst>(Inst);
+        FakeUse && FakeUse->getIntrinsicID() == Intrinsic::fake_use) {
+      if (auto UsedDef = dyn_cast<Instruction>(FakeUse->getOperand(0));
+          !UsedDef || UsedDef->getParent() != I->getParent() ||
+          UsedDef->comesBefore(&*I))
+        FakeUses.push_back(FakeUse);
+    }
+  }
+
+  for (auto *Inst : FakeUses)
+    Inst->moveBefore(*Inst->getParent(), I);
+}
+
 void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
                                         BasicBlock::const_iterator End,
                                         bool &HadTailCall) {
@@ -1665,6 +1709,16 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       FuncInfo->VisitedBBs[LLVMBB->getNumber()] = true;
     }
 
+    // Fake uses that follow tail calls are dropped. To avoid this, move
+    // such fake uses in front of the tail call, provided they don't
+    // use anything def'd by or after the tail call.
+    {
+      BasicBlock::iterator BBStart =
+          const_cast<BasicBlock *>(LLVMBB)->getFirstNonPHI()->getIterator();
+      BasicBlock::iterator BBEnd = const_cast<BasicBlock *>(LLVMBB)->end();
+      preserveFakeUses(BBStart, BBEnd);
+    }
+
     BasicBlock::const_iterator const Begin =
         LLVMBB->getFirstNonPHI()->getIterator();
     BasicBlock::const_iterator const End = LLVMBB->end();
@@ -2448,6 +2502,13 @@ void SelectionDAGISel::Select_UNDEF(SDNode *N) {
   CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
 }
 
+// Use the generic target FAKE_USE target opcode. The chain operand
+// must come last, because InstrEmitter::AddOperand() requires it.
+void SelectionDAGISel::Select_FAKE_USE(SDNode *N) {
+  CurDAG->SelectNodeTo(N, TargetOpcode::FAKE_USE, N->getValueType(0),
+                       N->getOperand(1), N->getOperand(0));
+}
+
 void SelectionDAGISel::Select_FREEZE(SDNode *N) {
   // TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now.
   // If FREEZE instruction is added later, the code below must be changed as
@@ -3219,6 +3280,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
   case ISD::UNDEF:
     Select_UNDEF(NodeToMatch);
     return;
+  case ISD::FAKE_USE:
+    Select_FAKE_USE(NodeToMatch);
+    return;
   case ISD::FREEZE:
     Select_FREEZE(NodeToMatch);
     return;
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 1d52ebe6717f04..c0b834650d73b0 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1206,6 +1206,7 @@ void TargetPassConfig::addMachinePasses() {
   // addPreEmitPass.  Maybe only pass "false" here for those targets?
   addPass(&FuncletLayoutID);
 
+  addPass(&RemoveLoadsIntoFakeUsesID);
   addPass(&StackMapLivenessID);
   addPass(&LiveDebugValuesID);
   addPass(&MachineSanitizerBinaryMetadataID);
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 6f0f3f244c050c..62d88ce21657b2 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -1171,7 +1171,10 @@ Instruction::getNextNonDebugInstruction(bool SkipPseudoOp) const {
 const Instruction *
 Instruction::getPrevNonDebugInstruction(bool SkipPseudoOp) const {
   for (const Instruction *I = getPrevNode(); I; I = I->getPrevNode())
-    if (!isa<DbgInfoIntrinsic>(I) && !(SkipPseudoOp && isa<PseudoProbeInst>(I)))
+    if (!isa<DbgInfoIntrinsic>(I) &&
+        !(SkipPseudoOp && isa<PseudoProbeInst>(I)) &&
+        !(isa<IntrinsicInst>(I) &&
+          cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fake_use))
       return I;
   return nullptr;
 }
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 2c0f10a34f919d..79b3ca3b6a5a7e 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5128,6 +5128,7 @@ void Verifier::visitInstruction(Instruction &I) {
                 F->getIntrinsicID() ==
                     Intrinsic::experimental_patchpoint_void ||
                 F->getIntrinsicID() == Intrinsic::experimental_patchpoint ||
+                F->getIntrinsicID() == Intrinsic::fake_use ||
                 F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint ||
                 F->getIntrinsicID() == Intrinsic::wasm_rethrow ||
                 IsAttachedCallOperand(F, CBI, i),
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 097e29527eed9f..e86d3771bd2f26 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -315,6 +315,7 @@ void NVPTXPassConfig::addIRPasses() {
   disablePass(&FuncletLayoutID);
   disablePass(&PatchableFunctionID);
   disablePass(&ShrinkWrapID);
+  disablePass(&RemoveLoadsIntoFakeUsesID);
 
   addPass(createNVPTXAAWrapperPass());
   addPass(createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
index 48a2ce89bad390..7058b15d53aa0b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -140,6 +140,7 @@ void SPIRVPassConfig::addPostRegAlloc() {
   disablePass(&ShrinkWrapID);
   disablePass(&LiveDebugValuesID);
   disablePass(&MachineLateInstrsCleanupID);
+  disablePass(&RemoveLoadsIntoFakeUsesID);
 
   // Do not work with OpPhi.
   disablePass(&BranchFolderPassID);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 23539a5f4b26f1..73765f8fa0092c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -552,6 +552,7 @@ void WebAssemblyPassConfig::addPostRegAlloc() {
   disablePass(&StackMapLivenessID);
   disablePass(&PatchableFunctionID);
   disablePass(&ShrinkWrapID);
+  disablePass(&RemoveLoadsIntoFakeUsesID);
 
   // This pass hurts code size for wasm because it can generate irreducible
   // control flow.
diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 02c3ca9839fc2d..ea94a4be32b2fa 100644
--- a/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -432,6 +432,24 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
     if (MI.isCall())
       FPInstClass = X86II::SpecialFP;
 
+    // A fake_use with a floating point pseudo register argument that is
+    // killed must behave like any other floating point operation and pop
+    // the floating point stack (this is done in handleSpecialFP()).
+    // Fake_use is, however, unusual, in that sometimes its operand is not
+    // killed because a later instruction (probably a return) will use it.
+    // It is this instruction that will pop the stack.
+    // In this scenario we can safely remove the fake_use's operand
+    // (it is live anyway).
+    if (MI.isFakeUse()) {
+      const MachineOperand &MO = MI.getOperand(0);
+      if (MO.isReg() && X86::RFP80RegClass.contains(MO.getReg())) {
+        if (MO.isKill())
+          FPInstClass = X86II::SpecialFP;
+        else
+          MI.removeOperand(0);
+      }
+    }
+
     if (FPInstClass == X86II::NotFP)
       continue;  // Efficiently ignore non-fp insts!
 
@@ -1737,6 +1755,20 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
     // Don't delete the inline asm!
     return;
   }
+
+  // FAKE_USE must pop its register operand off the stack if it is killed,
+  // because this constitutes the register's last use. If the operand
+  // is not killed, it will have its last use later, so we leave it alone.
+  // In either case we remove the operand so later passes don't see it.
+  case TargetOpcode::FAKE_USE: {
+    assert(MI.getNumExplicitOperands() == 1 &&
+           "FAKE_USE must have exactly one operand");
+    if (MI.getOperand(0).isKill()) {
+      freeStackSlotBefore(Inst, getFPReg(MI.getOperand(0)));
+    }
+    MI.removeOperand(0);
+    return;
+  }
   }
 
   Inst = MBB->erase(Inst);  // Remove the pseudo instruction
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 26b62cb79cdedf..2310cb3a7decbb 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -3802,6 +3802,12 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
 
   struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
     AAMDNodes AATags;
+    // A vector to hold the split components that we want to emit
+    // separate fake uses for.
+    SmallVector<Value *, 4> Components;
+    // A vector to hold all the fake uses of the struct that we are splitting.
+    // Usually there should only be one, but we are handling the general case.
+    SmallVector<Instruction *, 1> FakeUses;
 
     LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
                    AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
@@ -3826,10 +3832,32 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
           GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
         Load->setAAMetadata(
             AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL));
+      // Record the load so we can generate a fake use for this aggregate
+      // component.
+      Components.push_back(Load);
 
       Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
       LLVM_DEBUG(dbgs() << "          to: " << *Load << "\n");
     }
+
+    // Stash the fake uses that use the value generated by this instruction.
+    void recordFakeUses(LoadInst &LI) {
+      for (Use &U : LI.uses())
+        if (auto *II = dyn_cast<IntrinsicInst>(U.getUser()))
+          if (II->getIntrinsicID() == Intrinsic::fake_use)
+            FakeUses.push_back(II);
+    }
+
+    // Replace all fake uses of the aggregate with a series of fake uses, one
+    // for each split component.
+    void emitFakeUses() {
+      for (Instruction *I : FakeUses) {
+        IRB.SetInsertPoint(I);
+        for (auto *V : Components)
+          IRB.CreateIntrinsic(Intrinsic::fake_use, {}, {V});
+        I->eraseFromParent();
+      }
+    }
   };
 
   bool visitLoadInst(LoadInst &LI) {
@@ -3841,8 +3869,10 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
     LLVM_DEBUG(dbgs() << "    original: " << LI << "\n");
     LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
                             getAdjustedAlignment(&LI, 0), DL, IRB);
+    Splitter.recordFakeUses(LI);
     Value *V = PoisonValue::get(LI.getType());
     Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
+    Splitter.emitFakeUses();
     Visited.erase(&LI);
     LI.replaceAllUsesWith(V);
     LI.eraseFromParent();
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 47e3c03288d979..dc9ca1423f3e79 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -513,6 +513,12 @@ void PruningFunctionCloner::CloneBlock(
   for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE;
        ++II) {
 
+    // Don't clone fake_use as it may suppress many optimizations
+    // due to inlining, especially SROA.
+    if (auto *IntrInst = dyn_cast<IntrinsicInst>(II))
+      if (IntrInst->getIntrinsicID() == Intrinsic::fake_use)
+        continue;
+
     Instruction *NewInst = cloneInstruction(II);
     NewInst->insertInto(NewBB, NewBB->end());
 
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index e4809cd4bb44db..d0669e44f821b3 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3491,6 +3491,9 @@ static unsigned replaceDominatedUsesWith(Value *From, Value *To,
 
   unsigned Count = 0;
   for (Use &U : llvm::make_early_inc_range(From->uses())) {
+    auto *II = dyn_cast<IntrinsicInst>(U.getUser());
+    if (II && II->getIntrinsicID() == Intrinsic::fake_use)
+      continue;
     if (!ShouldReplace(Root, U))
       continue;
     LLVM_DEBUG(dbgs() << "Replace dominated use of '";
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 5251eb86bca926..1b7912fdf5e304 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -80,7 +80,8 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
       if (SI->isVolatile())
         return false;
     } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
-      if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
+      if (!II->isLifetimeStartOrEnd() && !II->isDroppable() &&
+          II->getIntrinsicID() != Intrinsic::fake_use)
         return false;
     } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
       if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI))
diff --git a/llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll b/llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll
index 2370fe1468b4ed..5d304d1569d3f6 100644
--- a/llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll
+++ b/llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll
@@ -16,7 +16,7 @@ for.body.lr.ph:                                   ; preds = %entry
 for.body:                                         ; preds = %for.inc, %for.body.lr.ph
   %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
   %add = add nsw i32 %i.02, 50, !dbg !16
-  call void @llvm.dbg.value(metadata i32 %add, i64 0, metadata !18, metadata !19), !dbg !20
+  tail call void @llvm.dbg.value(metadata i32 %add, i64 0, metadata !18, metadata !19), !dbg !20
   %idxprom = sext i32 %add to i64, !dbg !21
 
 ; CHECK:  %idxprom = sext i32 %add to i64
diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
index ba611493e1a76e..ec6f24a5650fa3 100644
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -69,6 +69,7 @@
 ; CHECK-NEXT:       Implement the 'patchable-function' attribute
 ; CHECK-NEXT:       Workaround A53 erratum 835769 pass
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
+; CHECK-NEXT:       Remove Loads Into Fake Uses
 ; CHECK-NEXT:       StackMap Liveness Analysis
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis
 ; CHECK-NEXT:       Machine Sanitizer Binary Metadata
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 3465b717261cf5..ffbe3dd377109f 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -224,6 +224,7 @@
 ; CHECK-NEXT:       Machine Copy Propagation Pass
 ; CHECK-NEXT:       Workaround A53 erratum 835769 pass
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
+; CHECK-NEXT:       Remove Loads Into Fake Uses
 ; CHECK-NEXT:       StackMap Liveness Analysis
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis
 ; CHECK-NEXT:       Machine Sanitizer Binary Metadata
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 29e8ebdafb5871..7bf1b8746fd87b 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -143,6 +143,7 @@
 ; GCN-O0-NEXT:        Post RA hazard recognizer
 ; GCN-O0-NEXT:        Branch relaxation pass
 ; GCN-O0-NEXT:        Register Usage Information Collector Pass
+; GCN-O0-NEXT:        Remove Loads Into Fake Uses
 ; GCN-O0-NEXT:        Live DEBUG_VALUE analysis
 ; GCN-O0-NEXT:        Machine Sanitizer Binary Metadata
 ; GCN-O0-NEXT:        Lazy Machine Block Frequency Analysis
@@ -420,6 +421,7 @@
 ; GCN-O1-NEXT:        AMDGPU Insert Delay ALU
 ; GCN-O1-NEXT:        Branch relaxation pass
 ; GCN-O1-NEXT:        Register Usage Information Collector Pass
+; GCN-O1-NEXT:        Remove Loads Into Fake Uses
 ; GCN-O1-NEXT:        Live DEBUG_VALUE analysis
 ; GCN-O1-NEXT:        Machine Sanitizer Binary Metadata
 ; GCN-O1-NEXT:        Lazy Machine Block Frequency Analysis
@@ -725,6 +727,7 @@
 ; GCN-O1-OPTS-NEXT:        AMDGPU Insert Delay ALU
 ; GCN-O1-OPTS-NEXT:        Branch relaxation pass
 ; GCN-O1-OPTS-NEXT:        Register Usage Information Collector Pass
+; GCN-O1-OPTS-NEXT:        Remove Loads Into Fake Uses
 ; GCN-O1-OPTS-NEXT:        Live DEBUG_VALUE analysis
 ; GCN-O1-OPTS-NEXT:        Machine Sanitizer Binary Metadata
 ; GCN-O1-OPTS-NEXT:        Lazy Machine Block Frequency Analysis
@@ -1036,6 +1039,7 @@
 ; GCN-O2-NEXT:        AMDGPU Insert Delay ALU
 ; GCN-O2-NEXT:        Branch relaxation pass
 ; GCN-O2-NEXT:        Register Usage Information Collector Pass
+; GCN-O2-NEXT:        Remove Loads Into Fake Uses
 ; GCN-O2-NEXT:        Live DEBUG_VALUE analysis
 ; GCN-O2-NEXT:        Machine Sanitizer Binary Metadata
 ; GCN-O2-NEXT:        Lazy Machine Block Frequency Analysis
@@ -1359,6 +1363,7 @@
 ; GCN-O3-NEXT:        AMDGPU Insert Delay ALU
 ; GCN-O3-NEXT:        Branch relaxation pass
 ; GCN-O3-NEXT:        Register Usage Information Collector Pass
+; GCN-O3-NEXT:        Remove Loads Into Fake Uses
 ; GCN-O3-NEXT:        Live DEBUG_VALUE analysis
 ; GCN-O3-NEXT:        Machine Sanitizer Binary Metadata
 ; GCN-O3-NEXT:        Lazy Machine Block Frequency Analysis
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 9b983d96f79330..819623d3fcc5a3 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -193,6 +193,7 @@
 ; CHECK-NEXT:      ARM block placement
 ; CHECK-NEXT:      optimise barriers pass
 ; CHECK-NEXT:      Contiguously Lay Out Funclets
+; CHECK-NEXT:      Remove Loads Into Fake Uses
 ; CHECK-NEXT:      StackMap Liveness Analysis
 ; CHECK-NEXT:      Live DEBUG_VALUE analysis
 ; CHECK-NEXT:      Machine Sanitizer Binary Metadata
diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
index 38c1dbcb1075fa..24bd4c75a9821e 100644
--- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
@@ -62,6 +62,7 @@
 ; CHECK-NEXT:       Implement the 'patchable-function' attribute
 ; CHECK-NEXT:       Branch relaxation pass
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
+; CHECK-NEXT:       Remove Loads Into Fake Uses
 ; CHECK-NEXT:       StackMap Liveness Analysis
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis
 ; CHECK-NEXT:       Machine Sanitizer Binary Metadata
diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
index 50f154663177d0..53cdbd18f9b907 100644
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -166,6 +166,7 @@
 ; LAXX-NEXT:       Implement the 'patchable-function' attribute
 ; LAXX-NEXT:       Branch relaxation pass
 ; LAXX-NEXT:       Contiguously Lay Out Funclets
+; LAXX-NEXT:       Remove Loads Into Fake Uses
 ; LAXX-NEXT:       StackMap Liveness Analysis
 ; LAXX-NEXT:       Live DEBUG_VALUE analysis
 ; LAXX-NEXT:       Machine Sanitizer Binary Metadata
diff --git a/llvm/test/CodeGen/MIR/X86/fake-use-tailcall.mir b/llvm/test/CodeGen/MIR/X86/fake-use-tailcall.mir
new file mode 100644
index 00000000000000..7eb8915f26a80f
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/X86/fake-use-tailcall.mir
@@ -0,0 +1,99 @@
+# In certain cases CodeGenPrepare folds a return instruction into
+# the return block's predecessor blocks and subsequently deletes the return block.
+# The purpose of this is to enable tail call optimization in the predecessor blocks.
+# Removal of the return block also removes fake use instructions that were present
+# in the return block, potentially causing debug information to be lost.
+#
+# The fix is to clone any fake use instructions that are not dominated by definitions
+# in the return block itself into the predecessor blocks. This test enures that we do so.
+#
+# Generated from the following source with
+# clang -fextend-lifetimes -S -emit-llvm -O2 -mllvm -stop-before=codegenprepare -o test.mir test.c
+#
+# extern int f0();
+# extern int f1();
+#
+# int foo(int i) {
+#   int temp = i;
+#   if (temp == 0)
+#     temp = f0();
+#   else
+#     temp = f1();
+#   return temp;
+# }
+#
+# RUN: llc -run-pass=codegenprepare -o - %s | FileCheck %s
+#
+# CHECK:      define{{.*}}foo
+# CHECK:      if.then:
+# CHECK-NEXT: call{{.*}}fake.use(i32 %i)
+# CHECK-NEXT: tail call i32{{.*}}@f0
+# CHECK-NEXT: ret
+# CHECK:      if.else:
+# CHECK-NEXT: call{{.*}}fake.use(i32 %i)
+# CHECK-NEXT: tail call i32{{.*}}@f1
+# CHECK-NEXT: ret
+
+--- |
+  define hidden i32 @foo(i32 %i) local_unnamed_addr optdebug {
+  entry:
+    %cmp = icmp eq i32 %i, 0
+    br i1 %cmp, label %if.then, label %if.else
+
+  if.then:
+    %call = tail call i32 (...) @f0()
+    br label %if.end
+
+  if.else:
+    %call1 = tail call i32 (...) @f1()
+    br label %if.end
+
+  if.end:
+    %temp.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ]
+    notail call void (...) @llvm.fake.use(i32 %temp.0)
+    notail call void (...) @llvm.fake.use(i32 %i)
+    ret i32 %temp.0
+  }
+  declare i32 @f0(...) local_unnamed_addr
+  declare i32 @f1(...) local_unnamed_addr
+
+...
+---
+name:            foo
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+
+...
diff --git a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll
index 4a17384e499936..5853647bf3b9f3 100644
--- a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll
@@ -60,6 +60,7 @@
 ; CHECK-NEXT:       Implement the 'patchable-function' attribute
 ; CHECK-NEXT:       PowerPC Pre-Emit Peephole
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
+; CHECK-NEXT:       Remove Loads Into Fake Uses
 ; CHECK-NEXT:       StackMap Liveness Analysis
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis
 ; CHECK-NEXT:       Machine Sanitizer Binary Metadata
diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
index 39b23a57513d9d..21bd4bb8502c3d 100644
--- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
@@ -214,6 +214,7 @@
 ; CHECK-NEXT:       PowerPC Pre-Emit Peephole
 ; CHECK-NEXT:       PowerPC Early-Return Creation
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
+; CHECK-NEXT:       Remove Loads Into Fake Uses
 ; CHECK-NEXT:       StackMap Liveness Analysis
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis
 ; CHECK-NEXT:       Machine Sanitizer Binary Metadata
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index 7473809a2c5d2e..84c7f3f987c065 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -63,6 +63,7 @@
 ; CHECK-NEXT:       Branch relaxation pass
 ; CHECK-NEXT:       RISC-V Make Compressible
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
+; CHECK-NEXT:       Remove Loads Into Fake Uses
 ; CHECK-NEXT:       StackMap Liveness Analysis
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis
 ; CHECK-NEXT:       Machine Sanitizer Binary Metadata
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 44c270fdc3c257..5d14d14d216244 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -189,6 +189,7 @@
 ; CHECK-NEXT:       Branch relaxation pass
 ; CHECK-NEXT:       RISC-V Make Compressible
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
+; CHECK-NEXT:       Remove Loads Into Fake Uses
 ; CHECK-NEXT:       StackMap Liveness Analysis
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis
 ; CHECK-NEXT:       Machine Sanitizer Binary Metadata
diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll
index 98b86384b8443d..4c99dd830b442e 100644
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -71,6 +71,7 @@
 ; CHECK-NEXT:       X86 Insert Cache Prefetches
 ; CHECK-NEXT:       X86 insert wait instruction
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
+; CHECK-NEXT:       Remove Loads Into Fake Uses
 ; CHECK-NEXT:       StackMap Liveness Analysis
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis
 ; CHECK-NEXT:       Machine Sanitizer Binary Metadata
diff --git a/llvm/test/CodeGen/X86/fake-use-hpfloat.ll b/llvm/test/CodeGen/X86/fake-use-hpfloat.ll
new file mode 100644
index 00000000000000..7a95c38801837c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-hpfloat.ll
@@ -0,0 +1,15 @@
+; assert in DAGlegalizer with fake use of half precision float.
+; Changes to half float promotion.
+; RUN: llc -stop-after=finalize-isel -o - %s | FileCheck %s
+;
+; CHECK:      bb.0.entry:
+; CHECK-NEXT: %0:fr16 = FsFLD0SH
+; CHECK-NEXT: FAKE_USE killed %0
+;
+target triple = "x86_64-unknown-unknown"
+
+define void @_Z6doTestv() local_unnamed_addr optdebug {
+entry:
+  tail call void (...) @llvm.fake.use(half 0xH0000)
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/fake-use-ld.ll b/llvm/test/CodeGen/X86/fake-use-ld.ll
new file mode 100644
index 00000000000000..86e7235091dd1c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-ld.ll
@@ -0,0 +1,43 @@
+; RUN: llc -O0 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+
+; Checks that fake uses of the FP stack do not cause a crash.
+;
+; /*******************************************************************/
+; extern long double foo(long double, long double, long double);
+;
+; long double actual(long double p1, long double p2, long double p3) {
+;   return fmal(p1, p2, p3);
+; }
+; /*******************************************************************/
+
+define x86_fp80 @actual(x86_fp80 %p1, x86_fp80 %p2, x86_fp80 %p3) optdebug {
+;
+; CHECK: actual
+;
+entry:
+  %p1.addr = alloca x86_fp80, align 16
+  %p2.addr = alloca x86_fp80, align 16
+  %p3.addr = alloca x86_fp80, align 16
+  store x86_fp80 %p1, ptr %p1.addr, align 16
+  store x86_fp80 %p2, ptr %p2.addr, align 16
+  store x86_fp80 %p3, ptr %p3.addr, align 16
+  %0 = load x86_fp80, ptr %p1.addr, align 16
+  %1 = load x86_fp80, ptr %p2.addr, align 16
+  %2 = load x86_fp80, ptr %p3.addr, align 16
+;
+; CHECK: callq{{.*}}foo
+;
+  %3 = call x86_fp80 @foo(x86_fp80 %0, x86_fp80 %1, x86_fp80 %2)
+  %4 = load x86_fp80, ptr %p1.addr, align 16
+  call void (...) @llvm.fake.use(x86_fp80 %4)
+  %5 = load x86_fp80, ptr %p2.addr, align 16
+  call void (...) @llvm.fake.use(x86_fp80 %5)
+  %6 = load x86_fp80, ptr %p3.addr, align 16
+  call void (...) @llvm.fake.use(x86_fp80 %6)
+;
+; CHECK: ret
+;
+  ret x86_fp80 %3
+}
+
+declare x86_fp80 @foo(x86_fp80, x86_fp80, x86_fp80)
diff --git a/llvm/test/CodeGen/X86/fake-use-scheduler.mir b/llvm/test/CodeGen/X86/fake-use-scheduler.mir
new file mode 100644
index 00000000000000..7e55f1d79aa7b6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-scheduler.mir
@@ -0,0 +1,123 @@
+# Prevent the machine scheduler from moving instructions past FAKE_USE.
+# RUN: llc -run-pass machine-scheduler -debug-only=machine-scheduler 2>&1 -o - %s | FileCheck %s
+# REQUIRES: asserts
+#
+# We make sure that, beginning with the first FAKE_USE instruction,
+# no changes to the sequence of instructions are undertaken by the
+# scheduler. We don't bother to check that the order of the FAKE_USEs
+# remains the same. They should, but it is irrelevant.
+#
+# CHECK: ********** MI Scheduling **********
+# CHECK-NEXT: foo:%bb.0 entry
+# CHECK-NEXT:   From: %0:gr64 = COPY $rdi
+# CHECK-NEXT:     To: FAKE_USE %5:gr64
+# CHECK-NEXT:  RegionInstrs: 7
+#
+# CHECK: ********** MI Scheduling **********
+# CHECK-NEXT: bar:%bb.0 entry
+# CHECK-NEXT:   From: %0:gr64 = COPY $rdi
+# CHECK-NEXT:     To: RET 0, killed $rax
+# CHECK-NEXT:  RegionInstrs: 7
+#
+--- |
+  ; ModuleID = 'test.ll'
+  source_filename = "test.ll"
+  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+  
+  @glb = common dso_local local_unnamed_addr global [100 x i32] zeroinitializer, align 16
+  
+  define dso_local i64 @foo(ptr %p) local_unnamed_addr optdebug {
+  entry:
+    %0 = load i32, ptr @glb, align 16
+    store i32 %0, ptr %p, align 4
+    %conv = sext i32 %0 to i64
+    %1 = load i32, ptr getelementptr inbounds ([100 x i32], ptr @glb, i64 0, i64 1), align 4
+    %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 1
+    store i32 %1, ptr %arrayidx1, align 4
+    %conv2 = sext i32 %1 to i64
+    %add3 = add nsw i64 %conv2, %conv
+    notail call void (...) @llvm.fake.use(i64 %add3)
+    notail call void (...) @llvm.fake.use(i32 %1)
+    notail call void (...) @llvm.fake.use(i32 %0)
+    notail call void (...) @llvm.fake.use(ptr %p)
+    ret i64 %add3
+  }
+  
+  define dso_local i64 @bar(ptr %p) local_unnamed_addr optdebug {
+  entry:
+    %0 = load i32, ptr @glb, align 16
+    store i32 %0, ptr %p, align 4
+    %conv = sext i32 %0 to i64
+    %1 = load i32, ptr getelementptr inbounds ([100 x i32], ptr @glb, i64 0, i64 1), align 4
+    %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 1
+    store i32 %1, ptr %arrayidx1, align 4
+    %conv2 = sext i32 %1 to i64
+    %add3 = add nsw i64 %conv2, %conv
+    ret i64 %add3
+  }
+  
+  ; Function Attrs: nocallback nofree nosync nounwind willreturn
+  declare void @llvm.stackprotector(ptr, ptr)
+  
+...
+---
+name:            foo
+alignment:       16
+tracksRegLiveness: true
+debugInstrRef:   true
+registers:
+  - { id: 0, class: gr64, preferred-register: '' }
+  - { id: 1, class: gr64_with_sub_8bit, preferred-register: '' }
+  - { id: 2, class: gr32, preferred-register: '' }
+  - { id: 3, class: gr64_with_sub_8bit, preferred-register: '' }
+  - { id: 4, class: gr32, preferred-register: '' }
+  - { id: 5, class: gr64, preferred-register: '' }
+liveins:
+  - { reg: '$rdi', virtual-reg: '%0' }
+body:             |
+  bb.0.entry:
+    liveins: $rdi
+  
+    %0:gr64 = COPY $rdi
+    %1:gr64_with_sub_8bit = MOVSX64rm32 $rip, 1, $noreg, @glb, $noreg
+    MOV32mr %0, 1, $noreg, 0, $noreg, %1.sub_32bit
+    %3:gr64_with_sub_8bit = MOVSX64rm32 $rip, 1, $noreg, @glb + 4, $noreg
+    MOV32mr %0, 1, $noreg, 4, $noreg, %3.sub_32bit
+    %5:gr64 = COPY %3
+    %5:gr64 = nsw ADD64rr %5, %1, implicit-def dead $eflags
+    FAKE_USE %5
+    FAKE_USE %3.sub_32bit
+    FAKE_USE %1.sub_32bit
+    FAKE_USE %0
+    $rax = COPY %5
+    RET 0, killed $rax
+
+...
+---
+name:            bar
+alignment:       16
+tracksRegLiveness: true
+debugInstrRef:   true
+registers:
+  - { id: 0, class: gr64, preferred-register: '' }
+  - { id: 1, class: gr64_with_sub_8bit, preferred-register: '' }
+  - { id: 2, class: gr32, preferred-register: '' }
+  - { id: 3, class: gr64_with_sub_8bit, preferred-register: '' }
+  - { id: 4, class: gr32, preferred-register: '' }
+  - { id: 5, class: gr64_with_sub_8bit, preferred-register: '' }
+liveins:
+  - { reg: '$rdi', virtual-reg: '%0' }
+body:             |
+  bb.0.entry:
+    liveins: $rdi
+  
+    %0:gr64 = COPY $rdi
+    %1:gr64_with_sub_8bit = MOVSX64rm32 $rip, 1, $noreg, @glb, $noreg
+    MOV32mr %0, 1, $noreg, 0, $noreg, %1.sub_32bit
+    %5:gr64_with_sub_8bit = MOVSX64rm32 $rip, 1, $noreg, @glb + 4, $noreg
+    MOV32mr %0, 1, $noreg, 4, $noreg, %5.sub_32bit
+    %5:gr64_with_sub_8bit = nsw ADD64rr %5, %1, implicit-def dead $eflags
+    $rax = COPY %5
+    RET 0, killed $rax
+
+...
diff --git a/llvm/test/CodeGen/X86/fake-use-simple-tail-call.ll b/llvm/test/CodeGen/X86/fake-use-simple-tail-call.ll
new file mode 100644
index 00000000000000..45a210ef391009
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-simple-tail-call.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -O2 -o - \
+; RUN:   | FileCheck %s --implicit-check-not=TAILCALL
+; Generated with: clang -emit-llvm -O2 -S -fextend-lifetimes test.cpp -o -
+; =========== test.cpp ===============
+; extern int bar(int);
+; int foo1(int i)
+; {
+;     return bar(i);
+; }
+; =========== test.cpp ===============
+
+; CHECK: TAILCALL
+
+; ModuleID = 'test.cpp'
+source_filename = "test.cpp"
+
+define i32 @_Z4foo1i(i32 %i) local_unnamed_addr optdebug {
+entry:
+  %call = tail call i32 @_Z3bari(i32 %i)
+  tail call void (...) @llvm.fake.use(i32 %i)
+  ret i32 %call
+}
+
+declare i32 @_Z3bari(i32) local_unnamed_addr
diff --git a/llvm/test/CodeGen/X86/fake-use-suppress-load.ll b/llvm/test/CodeGen/X86/fake-use-suppress-load.ll
new file mode 100644
index 00000000000000..c1b442ebd79ffa
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-suppress-load.ll
@@ -0,0 +1,14 @@
+; Suppress redundant loads feeding into fake uses.
+; RUN: llc -filetype=asm -o - %s --mtriple=x86_64-unknown-unknown | FileCheck %s
+; Windows ABI works differently, there's no offset.
+;
+; Look for the spill
+; CHECK:      movq %r{{[a-z]+,}} -{{[0-9]+\(%rsp\)}}
+; CHECK-NOT:  movq -{{[0-9]+\(%rsp\)}}, %r{{[a-z]+}}
+
+define dso_local i32 @f(ptr %p) local_unnamed_addr optdebug {
+entry:
+  call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"() #1
+  notail call void (...) @llvm.fake.use(ptr %p)
+  ret i32 4
+}
diff --git a/llvm/test/CodeGen/X86/fake-use-tailcall.ll b/llvm/test/CodeGen/X86/fake-use-tailcall.ll
new file mode 100644
index 00000000000000..10bb22e1b564ab
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-tailcall.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -stop-after=finalize-isel - | FileCheck %s --implicit-check-not FAKE_USE
+; Fake uses following tail calls should be pulled in front
+; of the TCRETURN instruction. Fake uses using something defined by
+; the tail call or after it should be suppressed.
+
+; CHECK: name:{{ +}}bar
+; CHECK: body:
+; CHECK: bb.0.{{.*}}:
+; CHECK: %0:{{.*}}= COPY
+; CHECK: FAKE_USE %0
+; CHECK: TCRETURN
+
+; CHECK: name:{{ +}}baz
+; CHECK: body:
+; CHECK: bb.0.{{.*}}:
+; CHECK: %0:{{.*}}= COPY
+; CHECK: FAKE_USE %0
+; CHECK: TCRETURN
+
+define void @bar(i32 %v) optdebug {
+entry:
+  %call = tail call i32 @_Z3fooi(i32 %v)
+  %mul = mul nsw i32 %call, 3
+  notail call void (...) @llvm.fake.use(i32 %mul)
+  notail call void (...) @llvm.fake.use(i32 %call)
+  notail call void (...) @llvm.fake.use(i32 %v)
+  ret void
+}
+
+define i32 @baz(i32 %v) optdebug {
+entry:
+  %call = tail call i32 @_Z3fooi(i32 %v)
+  notail call void (...) @llvm.fake.use(i32 %v)
+  ret i32 %call
+}
+
+declare i32 @_Z3fooi(i32) local_unnamed_addr
diff --git a/llvm/test/CodeGen/X86/fake-use-vector.ll b/llvm/test/CodeGen/X86/fake-use-vector.ll
new file mode 100644
index 00000000000000..cb46ccc8cac11c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-vector.ll
@@ -0,0 +1,39 @@
+; assert in DAGlegalizer with fake use of 1-element vectors.
+; RUN: llc -stop-after=finalize-isel -filetype=asm -o - %s | FileCheck %s
+;
+; ModuleID = 't2.cpp'
+; source_filename = "t2.cpp"
+; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+;
+; Check that we get past ISel and generate FAKE_USE machine instructions for
+; one-element vectors.
+;
+; CHECK:       bb.0.entry:
+; CHECK-DAG:   %1:gr64 = COPY $rdi
+; CHECK-DAG:   %0:vr128 = COPY $xmm0
+; CHECK:       %2:vr64 =
+; CHECK-DAG:   FAKE_USE %1
+; CHECK-DAG:   FAKE_USE %0
+; CHECK:       RET
+
+
+target triple = "x86_64-unknown-unknown"
+
+; Function Attrs: nounwind sspstrong uwtable
+define <4 x float> @_Z3runDv4_fDv1_x(<4 x float> %r, i64 %b.coerce) local_unnamed_addr #0 {
+entry:
+  %0 = insertelement <1 x i64> undef, i64 %b.coerce, i32 0
+  %1 = bitcast i64 %b.coerce to <1 x i64>
+  %2 = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %r, <1 x i64> %1)
+  tail call void (...) @llvm.fake.use(<1 x i64> %0)
+  tail call void (...) @llvm.fake.use(<4 x float> %r)
+  ret <4 x float> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>)
+
+; Function Attrs: nounwind
+declare void @llvm.fake.use(...)
+
+attributes #0 = { "target-cpu"="btver2" optdebug }
diff --git a/llvm/test/CodeGen/X86/fake-use-vector2.ll b/llvm/test/CodeGen/X86/fake-use-vector2.ll
new file mode 100644
index 00000000000000..6f2d3a5566dc67
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-vector2.ll
@@ -0,0 +1,27 @@
+; RUN: llc -stop-after=finalize-isel -filetype=asm -o - %s | FileCheck %s
+;
+; Make sure we can split vectors that are used as operands of FAKE_USE.
+
+; Generated from:
+;
+; typedef long __attribute__((ext_vector_type(8))) long8;
+; void test0() { long8 id208 {0, 1, 2, 3, 4, 5, 6, 7}; }
+
+; ModuleID = 't5.cpp'
+source_filename = "t5.cpp"
+
+
+; CHECK:     %0:vr256 = VMOV
+; CHECK:     %1:vr256 = VMOV
+; CHECK-DAG: FAKE_USE killed %1
+; CHECK-DAG: FAKE_USE killed %0
+; CHECK:     RET
+define void @_Z5test0v() local_unnamed_addr #0 {
+entry:
+  tail call void (...) @llvm.fake.use(<8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>) #1
+  ret void
+}
+
+declare void @llvm.fake.use(...)
+
+attributes #0 = { "target-cpu"="btver2" optdebug }
diff --git a/llvm/test/CodeGen/X86/fake-use-zero-length.ll b/llvm/test/CodeGen/X86/fake-use-zero-length.ll
new file mode 100644
index 00000000000000..e8c6791b8edff2
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-zero-length.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -stop-after=finalize-isel | FileCheck %s --implicit-check-not=FAKE_USE
+;
+; Make sure SelectionDAG does not crash handling fake uses of zero-length arrays
+; and structs. Check also that they are not propagated.
+;
+; Generated from the following source with
+; clang -fextend-lifetimes -S -emit-llvm -O2 -mllvm -stop-after=safe-stack -o test.mir test.cpp
+;
+; int main ()
+; { int array[0]; }
+;
+;
+; CHECK: liveins: $[[IN_REG:[a-zA-Z0-9]+]]
+; CHECK: %[[IN_VREG:[a-zA-Z0-9]+]]:gr32 = COPY $[[IN_REG]]
+; CHECK: FAKE_USE %[[IN_VREG]]
+
+source_filename = "test.ll"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define hidden i32 @main([0 x i32] %zero, [1 x i32] %one) local_unnamed_addr optdebug {
+entry:
+  notail call void (...) @bar([0 x i32] %zero)
+  notail call void (...) @baz([1 x i32] %one)
+  notail call void (...) @llvm.fake.use([0 x i32] %zero)
+  notail call void (...) @llvm.fake.use([1 x i32] %one)
+  ret i32 0
+}
+
+declare void @bar([0 x i32] %a)
+declare void @baz([1 x i32] %a)
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index 12c16a03b134c8..545640b7661691 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -211,6 +211,7 @@
 ; CHECK-NEXT:       X86 Insert Cache Prefetches
 ; CHECK-NEXT:       X86 insert wait instruction
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
+; CHECK-NEXT:       Remove Loads Into Fake Uses
 ; CHECK-NEXT:       StackMap Liveness Analysis
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis
 ; CHECK-NEXT:       Machine Sanitizer Binary Metadata
diff --git a/llvm/test/DebugInfo/AArch64/fake-use-global-isel.ll b/llvm/test/DebugInfo/AArch64/fake-use-global-isel.ll
new file mode 100644
index 00000000000000..65a64583096959
--- /dev/null
+++ b/llvm/test/DebugInfo/AArch64/fake-use-global-isel.ll
@@ -0,0 +1,98 @@
+; REQUIRES: object-emission
+
+; Make sure the fake use of 'b' at the end of 'foo' causes location information for 'b'
+; to extend all the way to the end of the function.
+; Duplicates `DebugInfo/X86/fake-use.ll` for global-isel.
+
+; RUN: %llc_dwarf -O2 --global-isel=1 -mtriple=aarch64--linux-gnu -filetype=obj -dwarf-linkage-names=Abstract < %s | llvm-dwarfdump --debug-info --debug-line -v - -o %t
+; RUN: %python %p/../Inputs/check-fake-use.py %t
+; RUN: sed -e 's,call void (...) @llvm.fake.use,;,' %s \
+; RUN:   | %llc_dwarf - -O2 --global-isel=1 -mtriple=aarch64--linux-gnu -filetype=obj -dwarf-linkage-names=Abstract \
+; RUN:   | llvm-dwarfdump --debug-info --debug-line -v - -o %t
+; RUN: not %python %p/../Inputs/check-fake-use.py %t
+
+; Generated with:
+; clang -O2 -g -S -emit-llvm -fextend-this-ptr fake-use.c
+;
+; int glob[10];
+; extern void bar();
+;
+; int foo(int b, int i)
+; {
+;    int loc = glob[i] * 2;
+;    if (b) {
+;      glob[2] = loc;
+;      bar();
+;    }
+;    return loc;
+; }
+;
+; ModuleID = 't2.c'
+source_filename = "t2.c"
+
+ at glob = common local_unnamed_addr global [10 x i32] zeroinitializer, align 16, !dbg !0
+
+; Function Attrs: nounwind sspstrong uwtable
+define i32 @foo(i32 %b, i32 %i) local_unnamed_addr optdebug !dbg !13 {
+entry:
+    #dbg_value(i32 %b, !17, !20, !21)
+  %c = add i32 %b, 42
+  %tobool = icmp sgt i32 %c, 2, !dbg !27
+  tail call void (...) @bar() #2, !dbg !32
+  %idxprom = sext i32 %i to i64, !dbg !22
+  %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @glob, i64 0, i64 %idxprom, !dbg !22
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !22, !tbaa !23
+  %mul = shl nsw i32 %0, 1, !dbg !22
+  br i1 %tobool, label %if.end, label %if.then, !dbg !29
+
+if.then:                                          ; preds = %entry
+  store i32 %mul, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @glob, i64 0, i64 2), align 8, !dbg !30, !tbaa !23
+  tail call void (...) @bar() #2, !dbg !32
+  br label %if.end, !dbg !33
+
+if.end:                                           ; preds = %entry, %if.then
+  call void (...) @llvm.fake.use(i32 %b), !dbg !34
+  ret i32 %mul, !dbg !35
+}
+
+declare void @bar(...) local_unnamed_addr
+
+!llvm.dbg.cu = !{!1}
+!llvm.module.flags = !{!9, !10, !11}
+!llvm.ident = !{!12}
+
+!0 = distinct !DIGlobalVariableExpression(var: !DIGlobalVariable(name: "glob", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true), expr: !DIExpression())
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang version 4.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4)
+!2 = !DIFile(filename: "t2.c", directory: "/")
+!3 = !{}
+!4 = !{!0}
+!5 = !DICompositeType(tag: DW_TAG_array_type, baseType: !6, size: 320, align: 32, elements: !7)
+!6 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!7 = !{!8}
+!8 = !DISubrange(count: 10)
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{i32 1, !"PIC Level", i32 2}
+!12 = !{!"clang version 4.0.0"}
+!13 = distinct !DISubprogram(name: "foo", scope: !2, file: !2, line: 4, type: !14, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !1, retainedNodes: !16)
+!14 = !DISubroutineType(types: !15)
+!15 = !{!6, !6, !6}
+!16 = !{!17, !19}
+!17 = !DILocalVariable(name: "b", arg: 1, scope: !13, file: !2, line: 4, type: !6)
+!19 = !DILocalVariable(name: "loc", scope: !13, file: !2, line: 6, type: !6)
+!20 = !DIExpression()
+!21 = !DILocation(line: 4, scope: !13)
+!22 = !DILocation(line: 6, scope: !13)
+!23 = !{!24, !24, i64 0}
+!24 = !{!"int", !25, i64 0}
+!25 = !{!"omnipotent char", !26, i64 0}
+!26 = !{!"Simple C/C++ TBAA"}
+!27 = !DILocation(line: 7, scope: !28)
+!28 = distinct !DILexicalBlock(scope: !13, file: !2, line: 7)
+!29 = !DILocation(line: 7, scope: !13)
+!30 = !DILocation(line: 8, scope: !31)
+!31 = distinct !DILexicalBlock(scope: !28, file: !2, line: 7)
+!32 = !DILocation(line: 9, scope: !31)
+!33 = !DILocation(line: 10, scope: !31)
+!34 = !DILocation(line: 12, scope: !13)
+!35 = !DILocation(line: 11, scope: !13)
diff --git a/llvm/test/DebugInfo/Inputs/check-fake-use.py b/llvm/test/DebugInfo/Inputs/check-fake-use.py
new file mode 100644
index 00000000000000..7797e102419b24
--- /dev/null
+++ b/llvm/test/DebugInfo/Inputs/check-fake-use.py
@@ -0,0 +1,107 @@
+#!/usr/bin/python3
+
+# Parsing dwarfdump's output to determine whether the location list for the
+# parameter "b" covers all of the function. The script searches for information
+# in the input file to determine the [prologue, epilogue) range for the
+# function, the location list range for "b", and checks that the latter covers
+# the entirety of the former.
+import re
+import sys
+
+DebugInfoPattern = r"\.debug_info contents:"
+DebugLinePattern = r"\.debug_line contents:"
+ProloguePattern = r"^\s*0x([0-9a-f]+)\s.+prologue_end"
+EpiloguePattern = r"^\s*0x([0-9a-f]+)\s.+epilogue_begin"
+FormalPattern = r"^0x[0-9a-f]+:\s+DW_TAG_formal_parameter"
+LocationPattern = r"DW_AT_location\s+\[DW_FORM_([a-z_]+)\](?:.*0x([a-f0-9]+))"
+DebugLocPattern = r'\[0x([a-f0-9]+),\s+0x([a-f0-9]+)\) ".text": (.+)$'
+
+SeenDebugInfo = False
+SeenDebugLine = False
+LocationRanges = None
+PrologueEnd = None
+EpilogueBegin = None
+
+# The dwarfdump output should contain the DW_AT_location for "b" first, then the
+# line table which should contain prologue_end and epilogue_begin entries.
+with open(sys.argv[1], "r") as dwarf_dump_file:
+    dwarf_iter = iter(dwarf_dump_file)
+    for line in dwarf_iter:
+        if not SeenDebugInfo and re.match(DebugInfoPattern, line):
+            SeenDebugInfo = True
+        if not SeenDebugLine and re.match(DebugLinePattern, line):
+            SeenDebugLine = True
+        # Get the range of DW_AT_location for "b".
+        if LocationRanges is None:
+            if match := re.match(FormalPattern, line):
+                # Go until we either find DW_AT_location or reach the end of this entry.
+                location_match = None
+                while location_match is None:
+                    if (line := next(dwarf_iter, "")) == "\n":
+                        raise RuntimeError(
+                            ".debug_info output is missing DW_AT_location for 'b'"
+                        )
+                    location_match = re.search(LocationPattern, line)
+                # Variable has whole-scope location, represented by an empty tuple.
+                if location_match.group(1) == "exprloc":
+                    LocationRanges = ()
+                    continue
+                if location_match.group(1) != "sec_offset":
+                    raise RuntimeError(
+                        f"Unhandled form for DW_AT_location: DW_FORM_{location_match.group(1)}"
+                    )
+                # Variable has location range list.
+                if (
+                    debug_loc_match := re.search(DebugLocPattern, next(dwarf_iter, ""))
+                ) is None:
+                    raise RuntimeError(f"Invalid location range list for 'b'")
+                LocationRanges = (
+                    int(debug_loc_match.group(1), 16),
+                    int(debug_loc_match.group(2), 16),
+                )
+                while (
+                    debug_loc_match := re.search(DebugLocPattern, next(dwarf_iter, ""))
+                ) is not None:
+                    match_loc_start = int(debug_loc_match.group(1), 16)
+                    match_loc_end = int(debug_loc_match.group(2), 16)
+                    match_expr = debug_loc_match.group(3)
+                    if match_loc_start != LocationRanges[1]:
+                        raise RuntimeError(
+                            f"Location list for 'b' is discontinuous from [0x{LocationRanges[1]:x}, 0x{match_loc_start:x})"
+                        )
+                    if "stack_value" in match_expr:
+                        raise RuntimeError(
+                            f"Location list for 'b' contains a stack_value expression: {match_expr}"
+                        )
+                    LocationRanges = (LocationRanges[0], match_loc_end)
+        # Get the prologue_end address.
+        elif PrologueEnd is None:
+            if match := re.match(ProloguePattern, line):
+                PrologueEnd = int(match.group(1), 16)
+        # Get the epilogue_begin address.
+        elif EpilogueBegin is None:
+            if match := re.match(EpiloguePattern, line):
+                EpilogueBegin = int(match.group(1), 16)
+                break
+
+if not SeenDebugInfo:
+    raise RuntimeError(".debug_info section not found.")
+if not SeenDebugLine:
+    raise RuntimeError(".debug_line section not found.")
+
+if LocationRanges is None:
+    raise RuntimeError(".debug_info output is missing parameter 'b'")
+if PrologueEnd is None:
+    raise RuntimeError(".debug_line output is missing prologue_end")
+if EpilogueBegin is None:
+    raise RuntimeError(".debug_line output is missing epilogue_begin")
+
+if len(LocationRanges) == 2 and (
+    LocationRanges[0] > PrologueEnd or LocationRanges[1] < EpilogueBegin
+):
+    raise RuntimeError(
+        f"""Location list for 'b' does not cover the whole function:")
+    Prologue to Epilogue = [0x{PrologueEnd:x}, 0x{EpilogueBegin:x})
+    Location range = [0x{LocationRanges[0]:x}, 0x{LocationRanges[1]:x})
+"""
+    )
diff --git a/llvm/test/DebugInfo/X86/fake-use.ll b/llvm/test/DebugInfo/X86/fake-use.ll
new file mode 100644
index 00000000000000..f44aadfeef5640
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/fake-use.ll
@@ -0,0 +1,96 @@
+; REQUIRES: object-emission
+
+; Make sure the fake use of 'b' at the end of 'foo' causes location information for 'b'
+; to extend all the way to the end of the function.
+
+; RUN: %llc_dwarf -O2 -filetype=obj -dwarf-linkage-names=Abstract < %s | llvm-dwarfdump --debug-info --debug-line -v - -o %t
+; RUN: %python %p/../Inputs/check-fake-use.py %t
+; RUN: sed -e 's,call void (...) @llvm.fake.use,;,' %s | %llc_dwarf - -O2 -filetype=obj -dwarf-linkage-names=Abstract | llvm-dwarfdump --debug-info --debug-line -v - -o %t
+; RUN: not %python %p/../Inputs/check-fake-use.py %t
+
+; Generated with:
+; clang -O2 -g -S -emit-llvm -fextend-this-ptr fake-use.c
+;
+; int glob[10];
+; extern void bar();
+;
+; int foo(int b, int i)
+; {
+;    int loc = glob[i] * 2;
+;    if (b) {
+;      glob[2] = loc;
+;      bar();
+;    }
+;    return loc;
+; }
+;
+; ModuleID = 't2.c'
+source_filename = "t2.c"
+
+ at glob = common local_unnamed_addr global [10 x i32] zeroinitializer, align 16, !dbg !0
+
+; Function Attrs: nounwind sspstrong uwtable
+define i32 @foo(i32 %b, i32 %i) local_unnamed_addr optdebug !dbg !13 {
+entry:
+    #dbg_value(i32 %b, !17, !20, !21)
+  %c = add i32 %b, 42
+  %tobool = icmp sgt i32 %c, 2, !dbg !27
+  tail call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"()
+  tail call void (...) @bar() #2, !dbg !32
+  %idxprom = sext i32 %i to i64, !dbg !22
+  %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @glob, i64 0, i64 %idxprom, !dbg !22
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !22, !tbaa !23
+  %mul = shl nsw i32 %0, 1, !dbg !22
+  br i1 %tobool, label %if.end, label %if.then, !dbg !29
+
+if.then:                                          ; preds = %entry
+  store i32 %mul, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @glob, i64 0, i64 2), align 8, !dbg !30, !tbaa !23
+  tail call void (...) @bar() #2, !dbg !32
+  br label %if.end, !dbg !33
+
+if.end:                                           ; preds = %entry, %if.then
+  call void (...) @llvm.fake.use(i32 %b), !dbg !34
+  ret i32 %mul, !dbg !35
+}
+
+declare void @bar(...) local_unnamed_addr
+
+!llvm.dbg.cu = !{!1}
+!llvm.module.flags = !{!9, !10, !11}
+!llvm.ident = !{!12}
+
+!0 = distinct !DIGlobalVariableExpression(var: !DIGlobalVariable(name: "glob", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true), expr: !DIExpression())
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang version 4.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4)
+!2 = !DIFile(filename: "t2.c", directory: "/")
+!3 = !{}
+!4 = !{!0}
+!5 = !DICompositeType(tag: DW_TAG_array_type, baseType: !6, size: 320, align: 32, elements: !7)
+!6 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!7 = !{!8}
+!8 = !DISubrange(count: 10)
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{i32 1, !"PIC Level", i32 2}
+!12 = !{!"clang version 4.0.0"}
+!13 = distinct !DISubprogram(name: "foo", scope: !2, file: !2, line: 4, type: !14, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !1, retainedNodes: !16)
+!14 = !DISubroutineType(types: !15)
+!15 = !{!6, !6, !6}
+!16 = !{!17, !19}
+!17 = !DILocalVariable(name: "b", arg: 1, scope: !13, file: !2, line: 4, type: !6)
+!19 = !DILocalVariable(name: "loc", scope: !13, file: !2, line: 6, type: !6)
+!20 = !DIExpression()
+!21 = !DILocation(line: 4, scope: !13)
+!22 = !DILocation(line: 6, scope: !13)
+!23 = !{!24, !24, i64 0}
+!24 = !{!"int", !25, i64 0}
+!25 = !{!"omnipotent char", !26, i64 0}
+!26 = !{!"Simple C/C++ TBAA"}
+!27 = !DILocation(line: 7, scope: !28)
+!28 = distinct !DILexicalBlock(scope: !13, file: !2, line: 7)
+!29 = !DILocation(line: 7, scope: !13)
+!30 = !DILocation(line: 8, scope: !31)
+!31 = distinct !DILexicalBlock(scope: !28, file: !2, line: 7)
+!32 = !DILocation(line: 9, scope: !31)
+!33 = !DILocation(line: 10, scope: !31)
+!34 = !DILocation(line: 12, scope: !13)
+!35 = !DILocation(line: 11, scope: !13)
diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td
index b52849b6bc931d..00601b7ae6e0d2 100644
--- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td
+++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td
@@ -136,14 +136,14 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
 // CHECK:      const uint8_t *GenMyCombiner::getMatchTable() const {
 // CHECK-NEXT:   constexpr static uint8_t MatchTable0[] = {
 // CHECK-NEXT:     GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2([[#LOWER:]]), GIMT_Encode2([[#UPPER:]]), /*)*//*default:*//*Label 6*/ GIMT_Encode4([[#DEFAULT:]]),
-// CHECK-NEXT:     /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(470), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
-// CHECK-NEXT:     /*TargetOpcode::G_AND*//*Label 1*/ GIMT_Encode4(506), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
-// CHECK-NEXT:     /*TargetOpcode::G_STORE*//*Label 2*/ GIMT_Encode4(553), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
-// CHECK-NEXT:     /*TargetOpcode::G_TRUNC*//*Label 3*/ GIMT_Encode4(587), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
-// CHECK-NEXT:     /*TargetOpcode::G_SEXT*//*Label 4*/ GIMT_Encode4(610), GIMT_Encode4(0),
-// CHECK-NEXT:     /*TargetOpcode::G_ZEXT*//*Label 5*/ GIMT_Encode4(622),
+// CHECK-NEXT:     /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(474), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
+// CHECK-NEXT:     /*TargetOpcode::G_AND*//*Label 1*/ GIMT_Encode4(510), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
+// CHECK-NEXT:     /*TargetOpcode::G_STORE*//*Label 2*/ GIMT_Encode4(557), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
+// CHECK-NEXT:     /*TargetOpcode::G_TRUNC*//*Label 3*/ GIMT_Encode4(591), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
+// CHECK-NEXT:     /*TargetOpcode::G_SEXT*//*Label 4*/ GIMT_Encode4(614), GIMT_Encode4(0),
+// CHECK-NEXT:     /*TargetOpcode::G_ZEXT*//*Label 5*/ GIMT_Encode4(626),
 // CHECK-NEXT:     // Label 0: @[[#%u, mul(UPPER-LOWER, 4) + 10]]
-// CHECK-NEXT:     GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(494), // Rule ID 4 //
+// CHECK-NEXT:     GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(498), // Rule ID 4 //
 // CHECK-NEXT:       GIM_CheckFeatures, GIMT_Encode2(GIFBS_HasAnswerToEverything),
 // CHECK-NEXT:       GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule3Enabled),
 // CHECK-NEXT:       // MIs[0] a
@@ -156,8 +156,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
 // CHECK-NEXT:       GIM_CheckIsSafeToFold, /*NumInsns*/1,
 // CHECK-NEXT:       // Combiner Rule #3: InstTest1
 // CHECK-NEXT:       GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner2),
-// CHECK-NEXT:     // Label 7: @494
-// CHECK-NEXT:     GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4(505), // Rule ID 3 //
+// CHECK-NEXT:     // Label 7: @498
+// CHECK-NEXT:     GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4(509), // Rule ID 3 //
 // CHECK-NEXT:       GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule2Enabled),
 // CHECK-NEXT:       // MIs[0] a
 // CHECK-NEXT:       // No operand predicates
@@ -165,10 +165,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
 // CHECK-NEXT:       // No operand predicates
 // CHECK-NEXT:       // Combiner Rule #2: InstTest0
 // CHECK-NEXT:       GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner1),
-// CHECK-NEXT:     // Label 8: @505
+// CHECK-NEXT:     // Label 8: @509
 // CHECK-NEXT:     GIM_Reject,
-// CHECK-NEXT:     // Label 1: @506
-// CHECK-NEXT:     GIM_Try, /*On fail goto*//*Label 9*/ GIMT_Encode4(552), // Rule ID 6 //
+// CHECK-NEXT:     // Label 1: @510
+// CHECK-NEXT:     GIM_Try, /*On fail goto*//*Label 9*/ GIMT_Encode4(556), // Rule ID 6 //
 // CHECK-NEXT:       GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule5Enabled),
 // CHECK-NEXT:       GIM_RootCheckType, /*Op*/2, /*Type*/GILLT_s32,
 // CHECK-NEXT:       // MIs[0] dst
@@ -185,10 +185,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
 // CHECK-NEXT:       GIR_RootToRootCopy, /*OpIdx*/0, // dst
 // CHECK-NEXT:       GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/1, /*OpIdx*/1, // z
 // CHECK-NEXT:       GIR_EraseRootFromParent_Done,
-// CHECK-NEXT:     // Label 9: @552
+// CHECK-NEXT:     // Label 9: @556
 // CHECK-NEXT:     GIM_Reject,
-// CHECK-NEXT:     // Label 2: @553
-// CHECK-NEXT:     GIM_Try, /*On fail goto*//*Label 10*/ GIMT_Encode4(586), // Rule ID 5 //
+// CHECK-NEXT:     // Label 2: @557
+// CHECK-NEXT:     GIM_Try, /*On fail goto*//*Label 10*/ GIMT_Encode4(590), // Rule ID 5 //
 // CHECK-NEXT:       GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule4Enabled),
 // CHECK-NEXT:       // MIs[0] tmp
 // CHECK-NEXT:       GIM_RecordInsnIgnoreCopies, /*DefineMI*/1, /*MI*/0, /*OpIdx*/0, // MIs[1]
@@ -204,29 +204,29 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
 // CHECK-NEXT:       GIR_RootToRootCopy, /*OpIdx*/1, // ptr
 // CHECK-NEXT:       GIR_MergeMemOperands, /*InsnID*/0, /*NumInsns*/2, /*MergeInsnID's*/0, 1,
 // CHECK-NEXT:       GIR_EraseRootFromParent_Done,
-// CHECK-NEXT:     // Label 10: @586
+// CHECK-NEXT:     // Label 10: @590
 // CHECK-NEXT:     GIM_Reject,
-// CHECK-NEXT:     // Label 3: @587
-// CHECK-NEXT:     GIM_Try, /*On fail goto*//*Label 11*/ GIMT_Encode4(598), // Rule ID 0 //
+// CHECK-NEXT:     // Label 3: @591
+// CHECK-NEXT:     GIM_Try, /*On fail goto*//*Label 11*/ GIMT_Encode4(602), // Rule ID 0 //
 // CHECK-NEXT:       GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule0Enabled),
 // CHECK-NEXT:       // Combiner Rule #0: WipOpcodeTest0; wip_match_opcode 'G_TRUNC'
 // CHECK-NEXT:       GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0),
-// CHECK-NEXT:     // Label 11: @598
-// CHECK-NEXT:     GIM_Try, /*On fail goto*//*Label 12*/ GIMT_Encode4(609), // Rule ID 1 //
+// CHECK-NEXT:     // Label 11: @602
+// CHECK-NEXT:     GIM_Try, /*On fail goto*//*Label 12*/ GIMT_Encode4(613), // Rule ID 1 //
 // CHECK-NEXT:       GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled),
 // CHECK-NEXT:       // Combiner Rule #1: WipOpcodeTest1; wip_match_opcode 'G_TRUNC'
 // CHECK-NEXT:       GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0),
-// CHECK-NEXT:     // Label 12: @609
+// CHECK-NEXT:     // Label 12: @613
 // CHECK-NEXT:     GIM_Reject,
-// CHECK-NEXT:     // Label 4: @610
-// CHECK-NEXT:     GIM_Try, /*On fail goto*//*Label 13*/ GIMT_Encode4(621), // Rule ID 2 //
+// CHECK-NEXT:     // Label 4: @614
+// CHECK-NEXT:     GIM_Try, /*On fail goto*//*Label 13*/ GIMT_Encode4(625), // Rule ID 2 //
 // CHECK-NEXT:       GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled),
 // CHECK-NEXT:       // Combiner Rule #1: WipOpcodeTest1; wip_match_opcode 'G_SEXT'
 // CHECK-NEXT:       GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0),
-// CHECK-NEXT:     // Label 13: @621
+// CHECK-NEXT:     // Label 13: @625
 // CHECK-NEXT:     GIM_Reject,
-// CHECK-NEXT:     // Label 5: @622
-// CHECK-NEXT:     GIM_Try, /*On fail goto*//*Label 14*/ GIMT_Encode4(656), // Rule ID 7 //
+// CHECK-NEXT:     // Label 5: @626
+// CHECK-NEXT:     GIM_Try, /*On fail goto*//*Label 14*/ GIMT_Encode4(660), // Rule ID 7 //
 // CHECK-NEXT:       GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule6Enabled),
 // CHECK-NEXT:       // MIs[0] dst
 // CHECK-NEXT:       // No operand predicates
@@ -240,7 +240,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
 // CHECK-NEXT:       GIR_RootToRootCopy, /*OpIdx*/0, // dst
 // CHECK-NEXT:       GIR_AddSimpleTempRegister, /*InsnID*/0, /*TempRegID*/0,
 // CHECK-NEXT:       GIR_EraseRootFromParent_Done,
-// CHECK-NEXT:     // Label 14: @656
+// CHECK-NEXT:     // Label 14: @660
 // CHECK-NEXT:     GIM_Reject,
 // CHECK-NEXT:     // Label 6: @[[#%u, DEFAULT]]
 // CHECK-NEXT:     GIM_Reject,
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/fake-use-phi.ll b/llvm/test/Transforms/CodeGenPrepare/X86/fake-use-phi.ll
new file mode 100644
index 00000000000000..064d3f29dd9ebb
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/fake-use-phi.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -passes='require<profile-summary>,function(codegenprepare)' -S -mtriple=x86_64 | FileCheck %s --implicit-check-not="llvm.fake.use"
+;
+; When performing return duplication to enable
+; tail call optimization we clone fake uses that exist in the to-be-eliminated
+; return block into the predecessor blocks. When doing this with fake uses
+; of PHI-nodes, they cannot be easily copied, but require the correct operand.
+; We are currently not able to do this correctly, so we suppress the cloning
+; of such fake uses at the moment.
+;
+; There should be no fake use of a call result in any of the resulting return
+; blocks.
+
+; Fake uses of `this` should be duplicated into both return blocks.
+; CHECK: if.then:
+; CHECK: @llvm.fake.use({{.*}}this
+; CHECK: if.else:
+; CHECK: @llvm.fake.use({{.*}}this
+
+; CHECK: declare void @llvm.fake.use
+
+source_filename = "test.ll"
+
+%class.a = type { i8 }
+
+declare i32 @foo(ptr nonnull dereferenceable(1)) local_unnamed_addr
+declare i32 @bar(ptr nonnull dereferenceable(1)) local_unnamed_addr
+
+define hidden void @func(ptr nonnull dereferenceable(1) %this) local_unnamed_addr align 2 optdebug {
+entry:
+  %b = getelementptr inbounds %class.a, ptr %this, i64 0, i32 0
+  %0 = load i8, i8* %b, align 1
+  %tobool.not = icmp eq i8 %0, 0
+  br i1 %tobool.not, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %call = tail call i32 @foo(ptr nonnull dereferenceable(1) %this)
+  %call2 = tail call i32 @bar(ptr nonnull dereferenceable(1) %this)
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %call4 = tail call i32 @bar(ptr nonnull dereferenceable(1) %this)
+  %call5 = tail call i32 @foo(ptr nonnull dereferenceable(1) %this)
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %call4.sink = phi i32 [ %call4, %if.else ], [ %call, %if.then ]
+  notail call void (...) @llvm.fake.use(i32 %call4.sink)
+  notail call void (...) @llvm.fake.use(ptr nonnull %this)
+  ret void
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/fake-use-split-ret.ll b/llvm/test/Transforms/CodeGenPrepare/X86/fake-use-split-ret.ll
new file mode 100644
index 00000000000000..b2cf89f6f2dd82
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/fake-use-split-ret.ll
@@ -0,0 +1,37 @@
+; RUN: opt -mtriple=x86_64-unknown-unknown -S -codegenprepare <%s -o - | FileCheck %s
+;
+; Ensure return instruction splitting ignores fake uses.
+;
+; IR Generated with clang -O2 -S -emit-llvm -fextend-lifetimes test.cpp
+;
+;// test.cpp
+;extern int bar(int);
+;
+;int foo2(int i)
+;{
+;  --i;
+;  if (i <= 0)
+;    return -1;
+;  return bar(i);
+;}
+
+declare i32 @_Z3bari(i32) local_unnamed_addr
+
+define i32 @_Z4foo2i(i32 %i) local_unnamed_addr optdebug {
+entry:
+  %dec = add nsw i32 %i, -1
+  %cmp = icmp slt i32 %i, 2
+  br i1 %cmp, label %cleanup, label %if.end
+
+if.end:                                           ; preds = %entry
+  %call = tail call i32 @_Z3bari(i32 %dec)
+; CHECK: ret i32 %call
+  br label %cleanup
+
+cleanup:                                          ; preds = %entry, %if.end
+; CHECK: cleanup:
+  %retval.0 = phi i32 [ %call, %if.end ], [ -1, %entry ]
+  tail call void (...) @llvm.fake.use(i32 %dec)
+; CHECK: ret i32 -1
+  ret i32 %retval.0
+}
diff --git a/llvm/test/Transforms/GVN/fake-use-constprop.ll b/llvm/test/Transforms/GVN/fake-use-constprop.ll
new file mode 100644
index 00000000000000..1466f9f9fca277
--- /dev/null
+++ b/llvm/test/Transforms/GVN/fake-use-constprop.ll
@@ -0,0 +1,60 @@
+; RUN: opt -passes=gvn -S < %s | FileCheck %s
+;
+; The Global Value Numbering pass (GVN) propagates boolean values
+; that are constant in dominated basic blocks to all the uses
+; in these basic blocks. However, we don't want the constant propagated
+; into fake.use intrinsics since this would render the intrinsic useless
+; with respect to keeping the variable live up until the fake.use.
+; This test checks that we don't generate any fake.uses with constant 0.
+;
+; Reduced from the following test case, generated with clang -O2 -S -emit-llvm -fextend-lifetimes test.c
+;
+; extern void func1();
+; extern int bar();
+; extern void baz(int);
+;
+; int foo(int i, float f, int *punused)
+; {
+;   int j = 3*i;
+;   if (j > 0) {
+;     int m = bar(i);
+;     if (m) {
+;       char b = f;
+;       baz(b);
+;       if (b)
+;         goto lab;
+;       func1();
+;     }
+; lab:
+;     func1();
+;   }
+;   return 1;
+; }
+
+;; GVN should propagate a constant value through to a regular call, but not to
+;; a fake use, which should continue to track the original value.
+; CHECK: %[[CONV_VAR:[a-zA-Z0-9]+]] = fptosi
+; CHECK: call {{.+}} @bees(i8 0)
+; CHECK: call {{.+}} @llvm.fake.use(i8 %[[CONV_VAR]])
+
+define i32 @foo(float %f) optdebug {
+  %conv = fptosi float %f to i8
+  %tobool3 = icmp eq i8 %conv, 0
+  br i1 %tobool3, label %if.end, label %lab
+
+if.end:
+  tail call void (...) @bees(i8 %conv)
+  tail call void (...) @llvm.fake.use(i8 %conv)
+  br label %lab
+
+lab:
+  ret i32 1
+}
+
+declare i32 @bar(...)
+
+declare void @baz(i32)
+
+declare void @bees(i32)
+
+declare void @func1(...)
diff --git a/llvm/test/Transforms/SROA/fake-use-escape.ll b/llvm/test/Transforms/SROA/fake-use-escape.ll
new file mode 100644
index 00000000000000..5429d09740e522
--- /dev/null
+++ b/llvm/test/Transforms/SROA/fake-use-escape.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -passes=sroa %s | FileCheck %s
+;
+;; Check that we do not assert and that we retain the fake_use instruction that
+;; uses the address of bar.
+;
+; CHECK: define{{.*}}foo
+; CHECK: call{{.*llvm\.fake\.use.*}}(ptr %bar.addr)
+
+define void @_Z3fooPi(ptr %bar) {
+entry:
+  %bar.addr = alloca ptr, align 8
+  %baz = alloca ptr, align 8
+  store ptr %bar, ptr %bar.addr, align 8
+  store ptr %bar.addr, ptr %baz, align 8
+  %0 = load ptr, ptr %bar.addr, align 8
+  %1 = load ptr, ptr %baz, align 8
+  call void (...) @llvm.fake.use(ptr %1)
+  ret void
+}
+
+declare void @llvm.fake.use(...)
diff --git a/llvm/test/Transforms/SROA/fake-use-sroa.ll b/llvm/test/Transforms/SROA/fake-use-sroa.ll
new file mode 100644
index 00000000000000..9e92df15487506
--- /dev/null
+++ b/llvm/test/Transforms/SROA/fake-use-sroa.ll
@@ -0,0 +1,52 @@
+; RUN: opt -S -passes=sroa %s | FileCheck %s
+; With fake use instrinsics generated for small aggregates, check that when
+; SROA slices the aggregate, we generate individual fake use intrinsics for
+; the individual values.
+
+; Generated from the following source:
+; struct s {
+;   int i;
+;   int j;
+; };
+;
+; void foo(struct s S) {
+; }
+;
+; void bar() {
+;   int arr[2] = {5, 6};
+; }
+;
+%struct.s = type { i32, i32 }
+ at __const.bar.arr = private unnamed_addr constant [2 x i32] [i32 5, i32 6], align 4
+
+; A small struct passed as parameter
+; CHECK-LABEL: define{{.*}}foo
+; CHECK:       %[[SLICE1:[^ ]+]] = trunc i64
+; CHECK:       %[[SLICE2:[^ ]+]] = trunc i64
+; CHECK-DAG:   call{{.*}} @llvm.fake.use(i32 %[[SLICE1]])
+; CHECK-DAG:   call{{.*}} @llvm.fake.use(i32 %[[SLICE2]])
+define dso_local void @foo(i64 %S.coerce) optdebug {
+entry:
+  %S = alloca %struct.s, align 4
+  store i64 %S.coerce, ptr %S, align 4
+  %fake.use = load %struct.s, ptr %S, align 4
+  notail call void (...) @llvm.fake.use(%struct.s %fake.use)
+  ret void
+}
+
+; A local variable with a small array type.
+; CHECK-LABEL: define{{.*}}bar
+; CHECK:       %[[ARRAYSLICE1:[^ ]+]] = load
+; CHECK:       %[[ARRAYSLICE2:[^ ]+]] = load
+; CHECK-DAG:   call{{.*}} @llvm.fake.use(i32 %[[ARRAYSLICE1]])
+; CHECK-DAG:   call{{.*}} @llvm.fake.use(i32 %[[ARRAYSLICE2]])
+define dso_local void @bar() optdebug {
+entry:
+  %arr = alloca [2 x i32], align 4
+  call void @llvm.memcpy.p0i8.p0i8.i64(ptr align 4 %arr, ptr align 4 bitcast (ptr @__const.bar.arr to ptr), i64 8, i1 false)
+  %fake.use = load [2 x i32], ptr %arr, align 4
+  notail call void (...) @llvm.fake.use([2 x i32] %fake.use)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
index bbd1f9af65095a..3a4c7ea03b3aa9 100644
--- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
@@ -201,6 +201,7 @@ static_library("CodeGen") {
     "RegisterPressure.cpp",
     "RegisterScavenging.cpp",
     "RegisterUsageInfo.cpp",
+    "RemoveLoadsIntoFakeUses.cpp",
     "RemoveRedundantDebugValues.cpp",
     "RenameIndependentSubregs.cpp",
     "ReplaceWithVeclib.cpp",



More information about the llvm-commits mailing list