[llvm] [ExtendLifetimes] Implement llvm.fake.use to extend variable lifetimes (PR #86149)

Stephen Tozer via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 21 09:37:21 PDT 2024


https://github.com/SLTozer created https://github.com/llvm/llvm-project/pull/86149

This patch is part of a set of patches that add an `-fextend-lifetimes` flag to clang, which extends the lifetimes of local variables and parameters for improved debuggability. In addition to that flag, the patch series adds a pragma to selectively disable `-fextend-lifetimes`, and an `-fextend-this-ptr` flag which functions as `-fextend-lifetimes` for this pointers only. All changes and tests in these patches were written by @wolfy1961, though I will be managing these reviews and addressing any comments raised. The extend lifetimes flag is intended to eventually be set on by `-Og`, as discussed in the RFC [here](https://discourse.llvm.org/t/rfc-redefine-og-o1-and-add-a-new-level-of-og/72850).

---

This patch implements a new intrinsic instruction in LLVM, `llvm.fake.use` in IR and `FAKE_USE` in MIR, that takes a single operand and has no effect other than "using" its operand, to ensure that its operand remains live until after the fake use. This patch does not emit fake uses anywhere; the next patch in this sequence causes them to be emitted from the clang frontend, such that for each variable (or this) a fake.use operand is inserted at the end of that variable's scope, using that variable's value. This patch covers everything post-frontend, which is largely just the basic plumbing for a new intrinsic/instruction, along with a few steps to preserve the fake uses through optimizations (such as moving them ahead of a tail call or translating them through SROA).

>From c6d6ccd97116d514c99aa50928131dab8340c8be Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Thu, 21 Mar 2024 16:21:52 +0000
Subject: [PATCH] [ExtendLifetimes] Implement llvm.fake.use to extend variable
 lifetimes

---
 llvm/docs/LangRef.rst                         |  36 ++++++
 llvm/include/llvm/Analysis/PtrUseVisitor.h    |   1 +
 llvm/include/llvm/CodeGen/ISDOpcodes.h        |   5 +
 llvm/include/llvm/CodeGen/MachineInstr.h      |   2 +
 llvm/include/llvm/CodeGen/SelectionDAGISel.h  |   1 +
 llvm/include/llvm/IR/Intrinsics.td            |   3 +
 llvm/include/llvm/Support/TargetOpcodes.def   |   3 +
 llvm/include/llvm/Target/Target.td            |   8 ++
 llvm/lib/CodeGen/Analysis.cpp                 |   3 +-
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  43 +++++++
 llvm/lib/CodeGen/CodeGenPrepare.cpp           |  42 ++++++-
 .../CodeGen/DeadMachineInstructionElim.cpp    |   3 +-
 llvm/lib/CodeGen/MachineCSE.cpp               |   2 +-
 llvm/lib/CodeGen/MachineScheduler.cpp         |   3 +-
 llvm/lib/CodeGen/SelectionDAG/FastISel.cpp    |   3 +
 .../SelectionDAG/LegalizeFloatTypes.cpp       |  20 +++
 .../SelectionDAG/LegalizeIntegerTypes.cpp     |  19 +++
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   7 ++
 .../SelectionDAG/LegalizeTypesGeneric.cpp     |  11 ++
 .../SelectionDAG/LegalizeVectorTypes.cpp      |  37 +++++-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  18 +++
 .../SelectionDAG/SelectionDAGDumper.cpp       |   2 +
 .../CodeGen/SelectionDAG/SelectionDAGISel.cpp |  64 ++++++++++
 llvm/lib/IR/Instruction.cpp                   |   5 +-
 llvm/lib/IR/Verifier.cpp                      |   1 +
 llvm/lib/Target/X86/X86FloatingPoint.cpp      |  32 +++++
 llvm/lib/Transforms/Scalar/SROA.cpp           |  30 +++++
 llvm/lib/Transforms/Utils/CloneFunction.cpp   |   6 +
 llvm/lib/Transforms/Utils/Local.cpp           |   3 +
 .../Utils/PromoteMemoryToRegister.cpp         |   3 +-
 llvm/test/CodeGen/MIR/X86/fake-use-phi.mir    |  95 +++++++++++++++
 .../CodeGen/MIR/X86/fake-use-scheduler.mir    | 115 ++++++++++++++++++
 .../CodeGen/MIR/X86/fake-use-tailcall.mir     | 106 ++++++++++++++++
 .../CodeGen/MIR/X86/fake-use-zero-length.ll   |  40 ++++++
 llvm/test/CodeGen/X86/fake-use-hpfloat.ll     |  17 +++
 llvm/test/CodeGen/X86/fake-use-ld.ll          |  51 ++++++++
 .../CodeGen/X86/fake-use-simple-tail-call.ll  |  33 +++++
 llvm/test/CodeGen/X86/fake-use-split-ret.ll   |  53 ++++++++
 llvm/test/CodeGen/X86/fake-use-sroa.ll        |  54 ++++++++
 .../CodeGen/X86/fake-use-suppress-load.ll     |  23 ++++
 llvm/test/CodeGen/X86/fake-use-tailcall.ll    |  23 ++++
 llvm/test/CodeGen/X86/fake-use-vector.ll      |  45 +++++++
 llvm/test/CodeGen/X86/fake-use-vector2.ll     |  33 +++++
 .../DebugInfo/X86/Inputs/check-fake-use.py    | 100 +++++++++++++++
 llvm/test/DebugInfo/X86/fake-use.ll           |  98 +++++++++++++++
 .../test/Transforms/GVN/fake-use-constprop.ll |  69 +++++++++++
 46 files changed, 1362 insertions(+), 9 deletions(-)
 create mode 100644 llvm/test/CodeGen/MIR/X86/fake-use-phi.mir
 create mode 100644 llvm/test/CodeGen/MIR/X86/fake-use-scheduler.mir
 create mode 100644 llvm/test/CodeGen/MIR/X86/fake-use-tailcall.mir
 create mode 100644 llvm/test/CodeGen/MIR/X86/fake-use-zero-length.ll
 create mode 100644 llvm/test/CodeGen/X86/fake-use-hpfloat.ll
 create mode 100644 llvm/test/CodeGen/X86/fake-use-ld.ll
 create mode 100644 llvm/test/CodeGen/X86/fake-use-simple-tail-call.ll
 create mode 100644 llvm/test/CodeGen/X86/fake-use-split-ret.ll
 create mode 100644 llvm/test/CodeGen/X86/fake-use-sroa.ll
 create mode 100644 llvm/test/CodeGen/X86/fake-use-suppress-load.ll
 create mode 100644 llvm/test/CodeGen/X86/fake-use-tailcall.ll
 create mode 100644 llvm/test/CodeGen/X86/fake-use-vector.ll
 create mode 100644 llvm/test/CodeGen/X86/fake-use-vector2.ll
 create mode 100644 llvm/test/DebugInfo/X86/Inputs/check-fake-use.py
 create mode 100644 llvm/test/DebugInfo/X86/fake-use.ll
 create mode 100644 llvm/test/Transforms/GVN/fake-use-constprop.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 8bc1cab01bf0a6..e7b8af3a2116c7 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -27896,6 +27896,42 @@ execution, but is unknown at compile time.
 If the result value does not fit in the result type, then the result is
 a :ref:`poison value <poisonvalues>`.
 
+.. _llvm_fake_use:
+
+'``llvm.fake.use``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.fake.use(...)
+
+Overview:
+"""""""""
+
+The ``llvm.fake.use`` intrinsic is a no-op. It takes a single
+value as an operand and is treated as a use of that operand, to force the
+optimizer to preserve that value prior to the fake use. This is used for
+extending the lifetimes of variables, where this intrinsic placed at the end of
+a variable's scope helps prevent that variable from being optimized out.
+
+Arguments:
+""""""""""
+
+The ``llvm.fake.use`` intrinsic takes one argument, which may be any
+function-local SSA value. Note that the signature is variadic so that the
+intrinsic can take any type of argument, but passing more than one argument will
+result in an error.
+
+Semantics:
+""""""""""
+
+This intrinsic does nothing, but optimizers must consider it a use of its single
+operand and should try to preserve the intrinsic and its position in the
+function.
+
 
 Stack Map Intrinsics
 --------------------
diff --git a/llvm/include/llvm/Analysis/PtrUseVisitor.h b/llvm/include/llvm/Analysis/PtrUseVisitor.h
index 86206b2d5e9f88..4e46d441fce17e 100644
--- a/llvm/include/llvm/Analysis/PtrUseVisitor.h
+++ b/llvm/include/llvm/Analysis/PtrUseVisitor.h
@@ -279,6 +279,7 @@ class PtrUseVisitor : protected InstVisitor<DerivedT>,
     default:
       return Base::visitIntrinsicInst(II);
 
+    case Intrinsic::fake_use:
     case Intrinsic::lifetime_start:
     case Intrinsic::lifetime_end:
       return; // No-op intrinsics.
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 49d51a27e3c0f6..43aa02b5b03762 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1304,6 +1304,11 @@ enum NodeType {
   LIFETIME_START,
   LIFETIME_END,
 
+  /// FAKE_USE represents a use of the operand but does not do anything.
+  /// Its purpose is the extension of the operand's lifetime mainly for
+  /// debugging purposes.
+  FAKE_USE,
+
   /// GC_TRANSITION_START/GC_TRANSITION_END - These operators mark the
   /// beginning and end of GC transition  sequence, and carry arbitrary
   /// information that target might need for lowering.  The first operand is
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index fcdd73d8b65fdd..163bcf6aa5a1a2 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -1401,6 +1401,8 @@ class MachineInstr
     return getOpcode() == TargetOpcode::EXTRACT_SUBREG;
   }
 
+  bool isFakeUse() const { return getOpcode() == TargetOpcode::FAKE_USE; }
+
   /// Return true if the instruction behaves like a copy.
   /// This does not include native copy instructions.
   bool isCopyLike() const {
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index 837f8bf7263ea9..cb8dec554a57b6 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -453,6 +453,7 @@ class SelectionDAGISel : public MachineFunctionPass {
   void Select_READ_REGISTER(SDNode *Op);
   void Select_WRITE_REGISTER(SDNode *Op);
   void Select_UNDEF(SDNode *N);
+  void Select_FAKE_USE(SDNode *N);
   void CannotYetSelect(SDNode *N);
 
   void Select_FREEZE(SDNode *N);
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 091f9b38107989..23a9f586056267 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1778,6 +1778,9 @@ def int_is_constant : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty],
                                 [IntrNoMem, IntrWillReturn, IntrConvergent],
                                 "llvm.is.constant">;
 
+// Introduce a use of the argument without generating any code.
+def int_fake_use : Intrinsic<[], [llvm_vararg_ty]>;
+
 // Intrinsic to mask out bits of a pointer.
 // First argument must be pointer or vector of pointer. This is checked by the
 // verifier.
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 899eaad5842ae0..9797bbe02a9b5c 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -217,6 +217,9 @@ HANDLE_TARGET_OPCODE(PATCHABLE_TYPED_EVENT_CALL)
 
 HANDLE_TARGET_OPCODE(ICALL_BRANCH_FUNNEL)
 
+/// Represents a use of the operand but generates no code.
+HANDLE_TARGET_OPCODE(FAKE_USE)
+
 // This is a fence with the singlethread scope. It represents a compiler memory
 // barrier, but does not correspond to any generated instruction.
 HANDLE_TARGET_OPCODE(MEMBARRIER)
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index cb1c0ed2513d45..2f5d9a5b8ea877 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1401,6 +1401,14 @@ def FAULTING_OP : StandardPseudoInstruction {
   let isTerminator = true;
   let isBranch = true;
 }
+def FAKE_USE : StandardPseudoInstruction {
+  // An instruction that uses its operands but does nothing.
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
+  let AsmString = "FAKE_USE";
+  let hasSideEffects = 0;
+  let isMeta = true;
+}
 def PATCHABLE_OP : StandardPseudoInstruction {
   let OutOperandList = (outs);
   let InOperandList = (ins variable_ops);
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index af7643d93591f7..05840c963ea526 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -566,7 +566,8 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) {
     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
       if (II->getIntrinsicID() == Intrinsic::lifetime_end ||
           II->getIntrinsicID() == Intrinsic::assume ||
-          II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl)
+          II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl ||
+          II->getIntrinsicID() == Intrinsic::fake_use)
         continue;
     if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
         !isSafeToSpeculativelyExecute(&*BBI))
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index a15538755d73b3..1e680630e6cb58 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1099,11 +1099,46 @@ void AsmPrinter::emitFunctionEntryLabel() {
   }
 }
 
+// Recognize cases where a spilled register is reloaded solely to feed into a
+// FAKE_USE.
+static bool isLoadFeedingIntoFakeUse(const MachineInstr &MI) {
+  const MachineFunction *MF = MI.getMF();
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+  // If the restore size is std::nullopt then we are not dealing with a reload
+  // of a spilled register.
+  if (!MI.getRestoreSize(TII))
+    return false;
+
+  // Check if this register is the operand of a FAKE_USE and
+  // does it have the kill flag set there.
+  auto NextI = std::next(MI.getIterator());
+  if (NextI == MI.getParent()->end() || !NextI->isFakeUse())
+    return false;
+
+  unsigned Reg = MI.getOperand(0).getReg();
+  for (const MachineOperand &MO : NextI->operands()) {
+    // Return true if we came across the register from the
+    // previous spill instruction that is killed in NextI.
+    if (MO.isReg() && MO.isUse() && MO.isKill() && MO.getReg() == Reg)
+      return true;
+  }
+
+  return false;
+}
+
 /// emitComments - Pretty-print comments for instructions.
 static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
   const MachineFunction *MF = MI.getMF();
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
 
+  // If this is a reload of a spilled register that only feeds into a FAKE_USE
+  // instruction, meaning the load value has no effect on the program and has
+  // only been kept alive for debugging; since it is still available on the
+  // stack, we can skip the load itself.
+  if (isLoadFeedingIntoFakeUse(MI))
+    return;
+
   // Check for spills and reloads
 
   // We assume a single instruction only has a spill or reload, not
@@ -1828,6 +1863,8 @@ void AsmPrinter::emitFunctionBody() {
       case TargetOpcode::KILL:
         if (isVerbose()) emitKill(&MI, *this);
         break;
+      case TargetOpcode::FAKE_USE:
+        break;
       case TargetOpcode::PSEUDO_PROBE:
         emitPseudoProbe(MI);
         break;
@@ -1843,6 +1880,12 @@ void AsmPrinter::emitFunctionBody() {
         // purely meta information.
         break;
       default:
+        // If this is a reload of a spilled register that only feeds into a
+        // FAKE_USE instruction, meaning the load value has no effect on the
+        // program and has only been kept alive for debugging; since it is
+        // still available on the stack, we can skip the load itself.
+        if (isLoadFeedingIntoFakeUse(MI))
+          break;
         emitInstruction(&MI);
         if (CanDoExtraAnalysis) {
           MCInst MCI;
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 9f99bb7e693f7e..a2314779c8e687 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2663,12 +2663,34 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
     return false;
   };
 
+  SmallVector<const IntrinsicInst *, 4> FakeUses;
+
+  auto isFakeUse = [&FakeUses](const Instruction *Inst) {
+    if (auto *II = dyn_cast<IntrinsicInst>(Inst);
+        II && II->getIntrinsicID() == Intrinsic::fake_use) {
+      // Record the instruction so it can be preserved when the exit block is
+      // removed. Do not preserve the fake use that uses the result of the
+      // PHI instruction.
+      // Do not copy fake uses that use the result of a PHI node.
+      // FIXME: If we do want to copy the fake use into the return blocks, we
+      // have to figure out which of the PHI node operands to use for each
+      // copy.
+      if (!isa<PHINode>(II->getOperand(0))) {
+        FakeUses.push_back(II);
+      }
+      return true;
+    }
+
+    return false;
+  };
+
   // Make sure there are no instructions between the first instruction
   // and return.
   const Instruction *BI = BB->getFirstNonPHI();
   // Skip over debug and the bitcast.
   while (isa<DbgInfoIntrinsic>(BI) || BI == BCI || BI == EVI ||
-         isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI))
+         isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI) ||
+         isFakeUse(BI))
     BI = BI->getNextNode();
   if (BI != RetI)
     return false;
@@ -2677,6 +2699,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
   /// call.
   const Function *F = BB->getParent();
   SmallVector<BasicBlock *, 4> TailCallBBs;
+  SmallVector<CallInst *, 4> CallInsts;
   if (PN) {
     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
       // Look through bitcasts.
@@ -2710,6 +2733,9 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
             attributesPermitTailCall(F, CI, RetI, *TLI))
           TailCallBBs.push_back(PredBB);
       }
+      // Record the call instruction so we can insert any fake uses
+      // that need to be preserved before it.
+      CallInsts.push_back(CI);
     }
   } else {
     SmallPtrSet<BasicBlock *, 4> VisitedBBs;
@@ -2726,6 +2752,9 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
               (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
                V == CI->getArgOperand(0))) {
             TailCallBBs.push_back(Pred);
+            // Record the call instruction so we can insert any fake uses
+            // that need to be preserved before it.
+            CallInsts.push_back(CI);
           }
         }
       }
@@ -2752,8 +2781,17 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
   }
 
   // If we eliminated all predecessors of the block, delete the block now.
-  if (Changed && !BB->hasAddressTaken() && pred_empty(BB))
+  if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) {
+    // Copy the fake uses found in the original return block to all blocks
+    // that contain tail calls.
+    for (auto *CI : CallInsts) {
+      for (auto const *FakeUse : FakeUses) {
+        auto *ClonedInst = FakeUse->clone();
+        ClonedInst->insertBefore(CI);
+      }
+    }
     BB->eraseFromParent();
+  }
 
   return Changed;
 }
diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index facc01452d2f12..468949fd4cf498 100644
--- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -87,7 +87,8 @@ bool DeadMachineInstructionElimImpl::isDead(const MachineInstr *MI) const {
     return false;
 
   // Don't delete frame allocation labels.
-  if (MI->getOpcode() == TargetOpcode::LOCAL_ESCAPE)
+  if (MI->getOpcode() == TargetOpcode::LOCAL_ESCAPE ||
+      MI->getOpcode() == TargetOpcode::FAKE_USE)
     return false;
 
   // Don't delete instructions with side effects.
diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp
index 26a8d00e662651..c3c7f48677d5c2 100644
--- a/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/llvm/lib/CodeGen/MachineCSE.cpp
@@ -406,7 +406,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
 
 bool MachineCSE::isCSECandidate(MachineInstr *MI) {
   if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
-      MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo())
+      MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo() || MI->isFakeUse())
     return false;
 
   // Ignore copies.
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 0d5bf329938781..1cc611107e5984 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -524,7 +524,8 @@ static bool isSchedBoundary(MachineBasicBlock::iterator MI,
                             MachineBasicBlock *MBB,
                             MachineFunction *MF,
                             const TargetInstrInfo *TII) {
-  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
+  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF) ||
+         MI->isFakeUse();
 }
 
 /// A region of an MBB for scheduling.
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 27b8472ddb73d8..5451abcdfcf9d2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1461,6 +1461,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
     updateValueMap(II, ResultReg);
     return true;
   }
+  case Intrinsic::fake_use:
+    // At -O0, we don't need fake use, so just ignore it.
+    return true;
   case Intrinsic::experimental_stackmap:
     return selectStackmap(II);
   case Intrinsic::experimental_patchpoint_void:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 3332c02ec72358..ac1242ff59f164 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -2229,6 +2229,9 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
       report_fatal_error("Do not know how to promote this operator's operand!");
 
     case ISD::BITCAST:    R = PromoteFloatOp_BITCAST(N, OpNo); break;
+    case ISD::FAKE_USE:
+      R = PromoteFloatOp_FAKE_USE(N, OpNo);
+      break;
     case ISD::FCOPYSIGN:  R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break;
     case ISD::FP_TO_SINT:
     case ISD::FP_TO_UINT:
@@ -2268,6 +2271,13 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo) {
   return DAG.getBitcast(N->getValueType(0), Convert);
 }
 
+SDValue DAGTypeLegalizer::PromoteFloatOp_FAKE_USE(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 1 && "Only Operand 1 must need promotion here");
+  SDValue Op = GetPromotedFloat(N->getOperand(OpNo));
+  return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::Other, N->getOperand(0),
+                     Op);
+}
+
 // Promote Operand 1 of FCOPYSIGN.  Operand 0 ought to be handled by
 // PromoteFloatRes_FCOPYSIGN.
 SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) {
@@ -3138,6 +3148,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
                        "operand!");
 
   case ISD::BITCAST:    Res = SoftPromoteHalfOp_BITCAST(N); break;
+  case ISD::FAKE_USE:
+    Res = SoftPromoteHalfOp_FAKE_USE(N, OpNo);
+    break;
   case ISD::FCOPYSIGN:  Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break;
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break;
@@ -3175,6 +3188,13 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_BITCAST(SDNode *N) {
   return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0);
 }
 
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 1 && "Only Operand 1 must need promotion here");
+  SDValue Op = GetSoftPromotedHalf(N->getOperand(OpNo));
+  return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::Other, N->getOperand(0),
+                     Op);
+}
+
 SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FCOPYSIGN(SDNode *N,
                                                       unsigned OpNo) {
   assert(OpNo == 1 && "Only Operand 1 must need promotion here");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 93ce9c22af5525..022cb9a4a82cdc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1818,6 +1818,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
   case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
   case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
+  case ISD::FAKE_USE:
+    Res = PromoteIntOp_FAKE_USE(N);
+    break;
   case ISD::INSERT_VECTOR_ELT:
     Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);
     break;
@@ -5124,6 +5127,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::BR_CC:             Res = ExpandIntOp_BR_CC(N); break;
   case ISD::BUILD_VECTOR:      Res = ExpandOp_BUILD_VECTOR(N); break;
   case ISD::EXTRACT_ELEMENT:   Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+  case ISD::FAKE_USE:
+    Res = ExpandOp_FAKE_USE(N);
+    break;
   case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
   case ISD::SCALAR_TO_VECTOR:  Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
   case ISD::SPLAT_VECTOR:      Res = ExpandIntOp_SPLAT_VECTOR(N); break;
@@ -5931,6 +5937,19 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_SUBVECTOR(SDNode *N) {
   return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
 }
 
+// FIXME: We wouldn't need this if clang could promote short integers
+// that are arguments to FAKE_USE.
+SDValue DAGTypeLegalizer::PromoteIntOp_FAKE_USE(SDNode *N) {
+  SDLoc dl(N);
+  SDValue V0 = N->getOperand(0);
+  SDValue V1 = N->getOperand(1);
+  EVT InVT1 = V1.getValueType();
+  SDValue VPromoted =
+      DAG.getNode(ISD::ANY_EXTEND, dl,
+                  TLI.getTypeToTransformTo(*DAG.getContext(), InVT1), V1);
+  return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), V0, VPromoted);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {
   SDLoc dl(N);
   SDValue V0 = GetPromotedInteger(N->getOperand(0));
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index e08acd36b41d4e..6a3f76ae4b6ded 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -368,6 +368,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N);
+  SDValue PromoteIntOp_FAKE_USE(SDNode *N);
   SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
   SDValue PromoteIntOp_ScalarOp(SDNode *N);
   SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
@@ -702,6 +703,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
 
   bool PromoteFloatOperand(SDNode *N, unsigned OpNo);
   SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
+  SDValue PromoteFloatOp_FAKE_USE(SDNode *N, unsigned OpNo);
   SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
   SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
   SDValue PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N, unsigned OpNo);
@@ -744,6 +746,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
 
   bool SoftPromoteHalfOperand(SDNode *N, unsigned OpNo);
   SDValue SoftPromoteHalfOp_BITCAST(SDNode *N);
+  SDValue SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo);
   SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
   SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N);
   SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N);
@@ -816,6 +819,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue ScalarizeVecOp_STRICT_FP_EXTEND(SDNode *N);
   SDValue ScalarizeVecOp_VECREDUCE(SDNode *N);
   SDValue ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N);
+  SDValue ScalarizeVecOp_FAKE_USE(SDNode *N);
 
   //===--------------------------------------------------------------------===//
   // Vector Splitting Support: LegalizeVectorTypes.cpp
@@ -899,6 +903,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue SplitVecOp_ExtVecInRegOp(SDNode *N);
+  SDValue SplitVecOp_FAKE_USE(SDNode *N);
   SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N, unsigned OpNo);
@@ -998,6 +1003,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecOp_INSERT_SUBVECTOR(SDNode *N);
   SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue WidenVecOp_EXTEND_VECTOR_INREG(SDNode *N);
+  SDValue WidenVecOp_FAKE_USE(SDNode *N);
   SDValue WidenVecOp_STORE(SDNode* N);
   SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo);
   SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo);
@@ -1125,6 +1131,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue ExpandOp_BITCAST          (SDNode *N);
   SDValue ExpandOp_BUILD_VECTOR     (SDNode *N);
   SDValue ExpandOp_EXTRACT_ELEMENT  (SDNode *N);
+  SDValue ExpandOp_FAKE_USE(SDNode *N);
   SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
   SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);
   SDValue ExpandOp_NormalStore      (SDNode *N, unsigned OpNo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index a55364ea2c4e5b..b402e823762764 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -403,6 +403,17 @@ SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
   return N->getConstantOperandVal(1) ? Hi : Lo;
 }
 
+// Split the integer operand in two and create a second FAKE_USE node for
+// the other half. The original SDNode is updated in place.
+SDValue DAGTypeLegalizer::ExpandOp_FAKE_USE(SDNode *N) {
+  SDValue Lo, Hi;
+  SDValue Chain = N->getOperand(0);
+  GetExpandedOp(N->getOperand(1), Lo, Hi);
+  SDValue LoUse = DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, Chain, Lo);
+  DAG.UpdateNodeOperands(N, LoUse, Hi);
+  return SDValue(N, 0);
+}
+
 SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
   // The vector type is legal but the element type needs expansion.
   EVT VecVT = N->getValueType(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 1f6e0097f31ab4..c23c586024ced3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -675,6 +675,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
   case ISD::BITCAST:
     Res = ScalarizeVecOp_BITCAST(N);
     break;
+  case ISD::FAKE_USE:
+    Res = ScalarizeVecOp_FAKE_USE(N);
+    break;
   case ISD::ANY_EXTEND:
   case ISD::ZERO_EXTEND:
   case ISD::SIGN_EXTEND:
@@ -766,6 +769,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
                      N->getValueType(0), Elt);
 }
 
+// Need to legalize vector operands of fake uses. Must be <1 x ty>.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_FAKE_USE(SDNode *N) {
+  assert(N->getOperand(1).getValueType().getVectorNumElements() == 1 &&
+         "Fake Use: Unexpected vector type!");
+  SDValue Elt = GetScalarizedVector(N->getOperand(1));
+  return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), Elt);
+}
+
 /// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
 /// Do the operation on the element instead.
 SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
@@ -3046,7 +3057,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
   case ISD::FLDEXP:
     Res = SplitVecOp_FPOpDifferentTypes(N);
     break;
-
+  case ISD::FAKE_USE:
+    Res = SplitVecOp_FAKE_USE(N);
+    break;
   case ISD::ANY_EXTEND_VECTOR_INREG:
   case ISD::SIGN_EXTEND_VECTOR_INREG:
   case ISD::ZERO_EXTEND_VECTOR_INREG:
@@ -3255,6 +3268,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
 }
 
+// Split a FAKE_USE use of a vector into FAKE_USEs of hi and lo part.
+SDValue DAGTypeLegalizer::SplitVecOp_FAKE_USE(SDNode *N) {
+  SDValue Lo, Hi;
+  GetSplitVector(N->getOperand(1), Lo, Hi);
+  SDValue Chain =
+      DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), Lo);
+  return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, Chain, Hi);
+}
+
 SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
   // For example, i64 = BITCAST v4i16 on alpha.  Typically the vector will
   // end up being split all the way down to individual components.  Convert the
@@ -6046,6 +6068,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
     report_fatal_error("Do not know how to widen this operator's operand!");
 
   case ISD::BITCAST:            Res = WidenVecOp_BITCAST(N); break;
+  case ISD::FAKE_USE:
+    Res = WidenVecOp_FAKE_USE(N);
+    break;
   case ISD::CONCAT_VECTORS:     Res = WidenVecOp_CONCAT_VECTORS(N); break;
   case ISD::INSERT_SUBVECTOR:   Res = WidenVecOp_INSERT_SUBVECTOR(N); break;
   case ISD::EXTRACT_SUBVECTOR:  Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
@@ -6388,6 +6413,16 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
   return CreateStackStoreLoad(InOp, VT);
 }
 
+// Vectors with sizes that are not powers of 2 need to be widened to the
+// next largest power of 2. For example, we may get a vector of 3 32-bit
+// integers or of 6 16-bit integers, both of which have to be widened to a
+// 128-bit vector.
+SDValue DAGTypeLegalizer::WidenVecOp_FAKE_USE(SDNode *N) {
+  SDValue WidenedOp = GetWidenedVector(N->getOperand(1));
+  return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0),
+                     WidenedOp);
+}
+
 SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
   EVT VT = N->getValueType(0);
   EVT EltVT = VT.getVectorElementType();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 2d63774c75e372..a3d6f97deee2c1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7526,6 +7526,24 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     return;
   }
 
+  case Intrinsic::fake_use: {
+    Value *V = I.getArgOperand(0);
+    SDValue Ops[2];
+    // If this fake use uses an argument that has an empty SDValue, it is a
+    // zero-length array or some other type that does not produce a register,
+    // so do not translate a fake use for it.
+    if (isa<Argument>(V) && !NodeMap[V])
+      return;
+    Ops[0] = getRoot();
+    Ops[1] = getValue(V);
+    // Also, do not translate a fake use with an undef operand, or any other
+    // empty SDValues.
+    if (!Ops[1] || Ops[1].isUndef())
+      return;
+    DAG.setRoot(DAG.getNode(ISD::FAKE_USE, sdl, MVT::Other, Ops));
+    return;
+  }
+
   case Intrinsic::eh_exceptionpointer:
   case Intrinsic::eh_exceptioncode: {
     // Get the exception pointer vreg, copy from it, and resize it to fit.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 20375a0f92b238..22ca6c086bd91a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -429,6 +429,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::UBSANTRAP:                  return "ubsantrap";
   case ISD::LIFETIME_START:             return "lifetime.start";
   case ISD::LIFETIME_END:               return "lifetime.end";
+  case ISD::FAKE_USE:
+    return "fake_use";
   case ISD::PSEUDO_PROBE:
     return "pseudoprobe";
   case ISD::GC_TRANSITION_START:        return "gc_transition.start";
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index d629c36bc792e3..762d5cc8b8144a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -712,6 +712,50 @@ static void reportFastISelFailure(MachineFunction &MF,
   LLVM_DEBUG(dbgs() << R.getMsg() << "\n");
 }
 
+// Detect any fake uses that follow a tail call and move them before the tail
+// call. Ignore fake uses that use values that are def'd by or after the tail
+// call.
+static void preserveFakeUses(BasicBlock::iterator Begin,
+                             BasicBlock::iterator End) {
+  BasicBlock::iterator I = End;
+  if (--I == Begin || !isa<ReturnInst>(*I))
+    return;
+  // Detect whether there are any fake uses trailing a (potential) tail call.
+  bool HaveFakeUse = false;
+  bool HaveTailCall = false;
+  do {
+    if (const CallInst *CI = dyn_cast<CallInst>(--I))
+      if (CI->isTailCall()) {
+        HaveTailCall = true;
+        break;
+      }
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+      if (II->getIntrinsicID() == Intrinsic::fake_use)
+        HaveFakeUse = true;
+  } while (I != Begin);
+
+  // If we didn't find any tail calls followed by fake uses, we are done.
+  if (!HaveTailCall || !HaveFakeUse)
+    return;
+
+  SmallVector<IntrinsicInst *> FakeUses;
+  // Record the fake uses we found so we can move them to the front of the
+  // tail call. Ignore them if they use a value that is def'd by or after
+  // the tail call.
+  for (BasicBlock::iterator Inst = I; Inst != End; Inst++) {
+    if (IntrinsicInst *FakeUse = dyn_cast<IntrinsicInst>(Inst);
+        FakeUse && FakeUse->getIntrinsicID() == Intrinsic::fake_use) {
+      if (auto UsedDef = dyn_cast<Instruction>(FakeUse->getOperand(0));
+          !UsedDef || UsedDef->getParent() != I->getParent() ||
+          UsedDef->comesBefore(&*I))
+        FakeUses.push_back(FakeUse);
+    }
+  }
+
+  for (auto *Inst : FakeUses)
+    Inst->moveBefore(*Inst->getParent(), I);
+}
+
 void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
                                         BasicBlock::const_iterator End,
                                         bool &HadTailCall) {
@@ -1576,6 +1620,16 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       FuncInfo->VisitedBBs.insert(LLVMBB);
     }
 
+    // Fake uses that follow tail calls are dropped. To avoid this, move
+    // such fake uses in front of the tail call, provided they don't
+    // use anything def'd by or after the tail call.
+    {
+      BasicBlock::iterator BBStart =
+          const_cast<BasicBlock *>(LLVMBB)->getFirstNonPHI()->getIterator();
+      BasicBlock::iterator BBEnd = const_cast<BasicBlock *>(LLVMBB)->end();
+      preserveFakeUses(BBStart, BBEnd);
+    }
+
     BasicBlock::const_iterator const Begin =
         LLVMBB->getFirstNonPHI()->getIterator();
     BasicBlock::const_iterator const End = LLVMBB->end();
@@ -2352,6 +2406,13 @@ void SelectionDAGISel::Select_UNDEF(SDNode *N) {
   CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
 }
 
+// Use the generic target FAKE_USE target opcode. The chain operand
+// must come last, because InstrEmitter::AddOperand() requires it.
+void SelectionDAGISel::Select_FAKE_USE(SDNode *N) {
+  CurDAG->SelectNodeTo(N, TargetOpcode::FAKE_USE, N->getValueType(0),
+                       N->getOperand(1), N->getOperand(0));
+}
+
 void SelectionDAGISel::Select_FREEZE(SDNode *N) {
   // TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now.
   // If FREEZE instruction is added later, the code below must be changed as
@@ -3114,6 +3175,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
   case ISD::UNDEF:
     Select_UNDEF(NodeToMatch);
     return;
+  case ISD::FAKE_USE:
+    Select_FAKE_USE(NodeToMatch);
+    return;
   case ISD::FREEZE:
     Select_FREEZE(NodeToMatch);
     return;
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 47a7f2c9de790f..44d261190d3f70 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -1125,7 +1125,10 @@ Instruction::getNextNonDebugInstruction(bool SkipPseudoOp) const {
 const Instruction *
 Instruction::getPrevNonDebugInstruction(bool SkipPseudoOp) const {
   for (const Instruction *I = getPrevNode(); I; I = I->getPrevNode())
-    if (!isa<DbgInfoIntrinsic>(I) && !(SkipPseudoOp && isa<PseudoProbeInst>(I)))
+    if (!isa<DbgInfoIntrinsic>(I) &&
+        !(SkipPseudoOp && isa<PseudoProbeInst>(I)) &&
+        !(isa<IntrinsicInst>(I) &&
+          cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fake_use))
       return I;
   return nullptr;
 }
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 819722566831c6..9561dff73044e0 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5017,6 +5017,7 @@ void Verifier::visitInstruction(Instruction &I) {
                 F->getIntrinsicID() ==
                     Intrinsic::experimental_patchpoint_void ||
                 F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 ||
+                F->getIntrinsicID() == Intrinsic::fake_use ||
                 F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint ||
                 F->getIntrinsicID() == Intrinsic::wasm_rethrow ||
                 IsAttachedCallOperand(F, CBI, i),
diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 260879ffaa4f12..eca390edd50422 100644
--- a/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -432,6 +432,24 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
     if (MI.isCall())
       FPInstClass = X86II::SpecialFP;
 
+    // A fake_use with a floating point pseudo register argument that is
+    // killed must behave like any other floating point operation and pop
+    // the floating point stack (this is done in handleSpecialFP()).
+    // Fake_use is, however, unusual, in that sometimes its operand is not
+    // killed because a later instruction (probably a return) will use it.
+    // It is this instruction that will pop the stack.
+    // In this scenario we can safely remove the fake_use's operand
+    // (it is live anyway).
+    if (MI.isFakeUse()) {
+      const MachineOperand &MO = MI.getOperand(0);
+      if (MO.isReg() && X86::RFP80RegClass.contains(MO.getReg())) {
+        if (MO.isKill())
+          FPInstClass = X86II::SpecialFP;
+        else
+          MI.removeOperand(0);
+      }
+    }
+
     if (FPInstClass == X86II::NotFP)
       continue;  // Efficiently ignore non-fp insts!
 
@@ -1735,6 +1753,20 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
     // Don't delete the inline asm!
     return;
   }
+
+  // FAKE_USE must pop its register operand off the stack if it is killed,
+  // because this constitutes the register's last use. If the operand
+  // is not killed, it will have its last use later, so we leave it alone.
+  // In either case we remove the operand so later passes don't see it.
+  case TargetOpcode::FAKE_USE: {
+    assert(MI.getNumExplicitOperands() == 1 &&
+           "FAKE_USE must have exactly one operand");
+    if (MI.getOperand(0).isKill()) {
+      freeStackSlotBefore(Inst, getFPReg(MI.getOperand(0)));
+    }
+    MI.removeOperand(0);
+    return;
+  }
   }
 
   Inst = MBB->erase(Inst);  // Remove the pseudo instruction
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 096c6d1b1fad27..dff95872e307ba 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -3846,6 +3846,12 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
 
   struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
     AAMDNodes AATags;
+    // A vector to hold the split components that we want to emit
+    // separate fake uses for.
+    SmallVector<Value *, 4> Components;
+    // A vector to hold all the fake uses of the struct that we are splitting.
+    // Usually there should only be one, but we are handling the general case.
+    SmallVector<Instruction *, 1> FakeUses;
 
     LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
                    AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
@@ -3870,10 +3876,32 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
           GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
         Load->setAAMetadata(
             AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL));
+      // Record the load so we can generate a fake use for this aggregate
+      // component.
+      Components.push_back(Load);
 
       Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
       LLVM_DEBUG(dbgs() << "          to: " << *Load << "\n");
     }
+
+    // Stash the fake uses that use the value generated by this instruction.
+    void recordFakeUses(LoadInst &LI) {
+      for (Use &U : LI.uses())
+        if (auto *II = dyn_cast<IntrinsicInst>(U.getUser()))
+          if (II->getIntrinsicID() == Intrinsic::fake_use)
+            FakeUses.push_back(II);
+    }
+
+    // Replace all fake uses of the aggregate with a series of fake uses, one
+    // for each split component.
+    void emitFakeUses() {
+      for (Instruction *I : FakeUses) {
+        IRB.SetInsertPoint(I);
+        for (auto *V : Components)
+          IRB.CreateIntrinsic(Intrinsic::fake_use, {}, {V});
+        I->eraseFromParent();
+      }
+    }
   };
 
   bool visitLoadInst(LoadInst &LI) {
@@ -3885,8 +3913,10 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
     LLVM_DEBUG(dbgs() << "    original: " << LI << "\n");
     LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
                             getAdjustedAlignment(&LI, 0), DL, IRB);
+    Splitter.recordFakeUses(LI);
     Value *V = PoisonValue::get(LI.getType());
     Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
+    Splitter.emitFakeUses();
     Visited.erase(&LI);
     LI.replaceAllUsesWith(V);
     LI.eraseFromParent();
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 3eac726994ae13..cfcd0c0f60cefe 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -523,6 +523,12 @@ void PruningFunctionCloner::CloneBlock(
   for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE;
        ++II) {
 
+    // Don't clone fake_use as it may suppress many optimizations
+    // due to inlining, especially SROA.
+    if (auto *IntrInst = dyn_cast<IntrinsicInst>(II))
+      if (IntrInst->getIntrinsicID() == Intrinsic::fake_use)
+        continue;
+
     Instruction *NewInst = cloneInstruction(II);
     NewInst->insertInto(NewBB, NewBB->end());
 
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 3d3b97eca92b3d..632458a0191fe8 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3431,6 +3431,9 @@ static unsigned replaceDominatedUsesWith(Value *From, Value *To,
 
   unsigned Count = 0;
   for (Use &U : llvm::make_early_inc_range(From->uses())) {
+    auto *II = dyn_cast<IntrinsicInst>(U.getUser());
+    if (II && II->getIntrinsicID() == Intrinsic::fake_use)
+      continue;
     if (!Dominates(Root, U))
       continue;
     LLVM_DEBUG(dbgs() << "Replace dominated use of '";
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index adcf161b313b2b..72c433840f91c0 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -78,7 +78,8 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
       if (SI->isVolatile())
         return false;
     } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
-      if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
+      if (!II->isLifetimeStartOrEnd() && !II->isDroppable() &&
+          II->getIntrinsicID() != Intrinsic::fake_use)
         return false;
     } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
       if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI))
diff --git a/llvm/test/CodeGen/MIR/X86/fake-use-phi.mir b/llvm/test/CodeGen/MIR/X86/fake-use-phi.mir
new file mode 100644
index 00000000000000..b8571304d870d8
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/X86/fake-use-phi.mir
@@ -0,0 +1,95 @@
+# RUN: llc < %s -x mir -run-pass=codegenprepare | FileCheck %s --implicit-check-not="llvm.fake.use"
+#
+# When performing return duplication to enable
+# tail call optimization we clone fake uses that exist in the to-be-eliminated
+# return block into the predecessor blocks. When doing this with fake uses
+# of PHI-nodes, they cannot be easily copied, but require the correct operand.
+# We are currently not able to do this correctly, so we suppress the cloning
+# of such fake uses at the moment.
+#
+# There should be no fake use of a call result in any of the resulting return
+# blocks.
+
+
+# CHECK: declare void @llvm.fake.use
+
+# Fake uses of `this` should be duplicated into both return blocks.
+# CHECK: if.then:
+# CHECK: @llvm.fake.use({{.*}}this
+# CHECK: if.else:
+# CHECK: @llvm.fake.use({{.*}}this
+
+--- |
+  source_filename = "test.ll"
+
+  %class.a = type { i8 }
+
+  declare void @llvm.fake.use(...)
+  declare i32 @foo(ptr nonnull dereferenceable(1)) local_unnamed_addr
+  declare i32 @bar(ptr nonnull dereferenceable(1)) local_unnamed_addr
+
+  define hidden void @func(ptr nonnull dereferenceable(1) %this) local_unnamed_addr align 2 {
+  entry:
+    %b = getelementptr inbounds %class.a, ptr %this, i64 0, i32 0
+    %0 = load i8, i8* %b, align 1
+    %tobool.not = icmp eq i8 %0, 0
+    br i1 %tobool.not, label %if.else, label %if.then
+
+  if.then:                                          ; preds = %entry
+    %call = tail call i32 @foo(ptr nonnull dereferenceable(1) %this)
+    %call2 = tail call i32 @bar(ptr nonnull dereferenceable(1) %this)
+    br label %if.end
+
+  if.else:                                          ; preds = %entry
+    %call4 = tail call i32 @bar(ptr nonnull dereferenceable(1) %this)
+    %call5 = tail call i32 @foo(ptr nonnull dereferenceable(1) %this)
+    br label %if.end
+
+  if.end:                                           ; preds = %if.else, %if.then
+    %call4.sink = phi i32 [ %call4, %if.else ], [ %call, %if.then ]
+    notail call void (...) @llvm.fake.use(i32 %call4.sink)
+    notail call void (...) @llvm.fake.use(ptr nonnull %this)
+    ret void
+  }
+
+...
+---
+name:            func
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+
+...
diff --git a/llvm/test/CodeGen/MIR/X86/fake-use-scheduler.mir b/llvm/test/CodeGen/MIR/X86/fake-use-scheduler.mir
new file mode 100644
index 00000000000000..bb2136e50d5b7e
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/X86/fake-use-scheduler.mir
@@ -0,0 +1,115 @@
+# Prevent the machine scheduler from moving instructions past FAKE_USE.
+# RUN: llc -run-pass machine-scheduler -o - %s | FileCheck %s
+#
+# We make sure that, beginning with the first FAKE_USE instruction,
+# no changes to the sequence of instructions are undertaken by the
+# scheduler. We don't bother to check that the order of the FAKE_USEs
+# remains the same. They should, but it is irrelevant.
+#
+# CHECK:      bb.{{.*}}:
+# CHECK:      FAKE_USE
+# CHECK-NEXT: FAKE_USE
+# CHECK-NEXT: FAKE_USE
+# CHECK-NEXT: FAKE_USE
+# CHECK-NEXT: COPY
+# CHECK-NEXT: RET
+#
+--- |
+  @glb = common dso_local local_unnamed_addr global [100 x i32] zeroinitializer, align 16
+
+  ; Function Attrs: nounwind uwtable
+  define dso_local i64 @foo(i32* %p) local_unnamed_addr {
+  entry:
+    %0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @glb, i64 0, i64 0), align 16, !tbaa !2
+    store i32 %0, i32* %p, align 4, !tbaa !2
+    %conv = sext i32 %0 to i64
+    %1 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @glb, i64 0, i64 1), align 4, !tbaa !2
+    %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1
+    store i32 %1, i32* %arrayidx1, align 4, !tbaa !2
+    %conv2 = sext i32 %1 to i64
+    %add3 = add nsw i64 %conv2, %conv
+    notail call void (...) @llvm.fake.use(i64 %add3)
+    notail call void (...) @llvm.fake.use(i32 %1)
+    notail call void (...) @llvm.fake.use(i32 %0)
+    notail call void (...) @llvm.fake.use(i32* %p)
+    ret i64 %add3
+  }
+
+  ; Function Attrs: nounwind
+  declare void @llvm.fake.use(...) #1
+
+  ; Function Attrs: nounwind
+  declare void @llvm.stackprotector(i8*, i8**) #1
+
+
+  !llvm.module.flags = !{!0}
+  !llvm.ident = !{!1}
+
+  !0 = !{i32 1, !"wchar_size", i32 4}
+  !1 = !{!"clang version 9.0.0"}
+  !2 = !{!3, !3, i64 0}
+  !3 = !{!"int", !4, i64 0}
+  !4 = !{!"omnipotent char", !5, i64 0}
+  !5 = !{!"Simple C/C++ TBAA"}
+
+...
+---
+name:            foo
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: gr64, preferred-register: '' }
+  - { id: 1, class: gr64_with_sub_8bit, preferred-register: '' }
+  - { id: 2, class: gr32, preferred-register: '' }
+  - { id: 3, class: gr64_with_sub_8bit, preferred-register: '' }
+  - { id: 4, class: gr32, preferred-register: '' }
+  - { id: 5, class: gr64, preferred-register: '' }
+liveins:
+  - { reg: '$rdi', virtual-reg: '%0' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+constants:       []
+body:             |
+  bb.0.entry:
+    liveins: $rdi
+
+    %0:gr64 = COPY $rdi
+    %1:gr64_with_sub_8bit = MOVSX64rm32 $rip, 1, $noreg, @glb, $noreg :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @glb, i64 0, i64 0)`, align 16, !tbaa !2)
+    MOV32mr %0, 1, $noreg, 0, $noreg, %1.sub_32bit :: (store 4 into %ir.p, !tbaa !2)
+    %3:gr64_with_sub_8bit = MOVSX64rm32 $rip, 1, $noreg, @glb + 4, $noreg :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @glb, i64 0, i64 1)`, !tbaa !2)
+    MOV32mr %0, 1, $noreg, 4, $noreg, %3.sub_32bit :: (store 4 into %ir.arrayidx1, !tbaa !2)
+    %5:gr64 = COPY %3
+    %5:gr64 = nsw ADD64rr %5, %1, implicit-def dead $eflags
+    FAKE_USE %5
+    FAKE_USE %3.sub_32bit
+    FAKE_USE %1.sub_32bit
+    FAKE_USE %0
+    $rax = COPY %5
+    RET 0, killed $rax
+
+...
diff --git a/llvm/test/CodeGen/MIR/X86/fake-use-tailcall.mir b/llvm/test/CodeGen/MIR/X86/fake-use-tailcall.mir
new file mode 100644
index 00000000000000..89d3854ac95f88
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/X86/fake-use-tailcall.mir
@@ -0,0 +1,106 @@
+# In certain cases CodeGenPrepare folds a return instruction into
+# the return block's predecessor blocks and subsequently deletes the return block.
+# The purpose of this is to enable tail call optimization in the predecessor blocks.
+# Removal of the return block also removes fake use instructions that were present
+# in the return block, potentially causing debug information to be lost.
+#
+# The fix is to clone any fake use instructions that are not dominated by definitions
+# in the return block itself into the predecessor blocks. This test enures that we do so.
+#
+# Generated from the following source with
+# clang -fextend-lifetimes -S -emit-llvm -O2 -mllvm -stop-before=codegenprepare -o test.mir test.c
+#
+# extern int f0();
+# extern int f1();
+#
+# int foo(int i) {
+#   int temp = i;
+#   if (temp == 0)
+#     temp = f0();
+#   else
+#     temp = f1();
+#   return temp;
+# }
+#
+# RUN: llc -run-pass=codegenprepare -o - %s | FileCheck %s
+#
+# CHECK:      define{{.*}}foo
+# CHECK:      if.then:
+# CHECK-NEXT: call{{.*}}fake.use(i32 %i)
+# CHECK-NEXT: tail call i32{{.*}}@f0
+# CHECK-NEXT: ret
+# CHECK:      if.else:
+# CHECK-NEXT: call{{.*}}fake.use(i32 %i)
+# CHECK-NEXT: tail call i32{{.*}}@f1
+# CHECK-NEXT: ret
+
+--- |
+  define hidden i32 @foo(i32 %i) local_unnamed_addr {
+  entry:
+    %cmp = icmp eq i32 %i, 0
+    br i1 %cmp, label %if.then, label %if.else
+
+  if.then:
+    %call = tail call i32 (...) @f0()
+    br label %if.end
+
+  if.else:
+    %call1 = tail call i32 (...) @f1()
+    br label %if.end
+
+  if.end:
+    %temp.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ]
+    notail call void (...) @llvm.fake.use(i32 %temp.0)
+    notail call void (...) @llvm.fake.use(i32 %i)
+    ret i32 %temp.0
+  }
+  declare i32 @f0(...) local_unnamed_addr
+  declare i32 @f1(...) local_unnamed_addr
+  declare void @llvm.fake.use(...)
+
+  !llvm.module.flags = !{!0}
+  !llvm.ident = !{!1}
+
+  !0 = !{i32 1, !"wchar_size", i32 2}
+  !1 = !{!"clang version 10.0.0"}
+
+...
+---
+name:            foo
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+
+...
diff --git a/llvm/test/CodeGen/MIR/X86/fake-use-zero-length.ll b/llvm/test/CodeGen/MIR/X86/fake-use-zero-length.ll
new file mode 100644
index 00000000000000..b23d74814882ab
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/X86/fake-use-zero-length.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -stop-after=finalize-isel | FileCheck %s --implicit-check-not=FAKE_USE
+;
+; Make sure SelectionDAG does not crash handling fake uses of zero-length arrays
+; and structs. Check also that they are not propagated.
+;
+; Generated from the following source with
+; clang -fextend-lifetimes -S -emit-llvm -O2 -mllvm -stop-after=safe-stack -o test.mir test.cpp
+;
+; int main ()
+; { int array[0]; }
+;
+;
+; CHECK: liveins: $[[IN_REG:[a-zA-Z0-9]+]]
+; CHECK: %[[IN_VREG:[a-zA-Z0-9]+]]:gr32 = COPY $[[IN_REG]]
+; CHECK: FAKE_USE %[[IN_VREG]]
+
+source_filename = "test.ll"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define hidden i32 @main([0 x i32] %zero, [1 x i32] %one) local_unnamed_addr {
+entry:
+  notail call void (...) @bar([0 x i32] %zero)
+  notail call void (...) @baz([1 x i32] %one)
+  notail call void (...) @llvm.fake.use([0 x i32] %zero)
+  notail call void (...) @llvm.fake.use([1 x i32] %one)
+  ret i32 0
+}
+
+declare void @bar([0 x i32] %a)
+declare void @baz([1 x i32] %a)
+
+; Function Attrs: nounwind
+declare void @llvm.fake.use(...)
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 2}
+!1 = !{i32 7, !"PIC Level", i32 2}
+!2 = !{!"clang version 10.0.0"}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/fake-use-hpfloat.ll b/llvm/test/CodeGen/X86/fake-use-hpfloat.ll
new file mode 100644
index 00000000000000..9ec53fb9558c55
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-hpfloat.ll
@@ -0,0 +1,17 @@
+; assert in DAGlegalizer with fake use of half precision float.
+; Changes to half float promotion.
+; RUN: llc -O2 -stop-after=finalize-isel -filetype=asm -o - %s | FileCheck %s
+;
+; CHECK:      bb.0.entry:
+; CHECK-NEXT: %0:fr16 = FsFLD0SH
+; CHECK-NEXT: FAKE_USE killed %0
+;
+target triple = "x86_64-unknown-unknown"
+
+define void @_Z6doTestv() local_unnamed_addr {
+entry:
+  tail call void (...) @llvm.fake.use(half 0xH0000)
+  ret void
+}
+
+declare void @llvm.fake.use(...)
diff --git a/llvm/test/CodeGen/X86/fake-use-ld.ll b/llvm/test/CodeGen/X86/fake-use-ld.ll
new file mode 100644
index 00000000000000..90ecff6dd59680
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-ld.ll
@@ -0,0 +1,51 @@
+; RUN: llc -O0 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+
+; Checks that fake uses of the FP stack do not cause a crash.
+;
+; /*******************************************************************/
+; extern long double foo(long double, long double, long double);
+;
+; long double actual(long double p1, long double p2, long double p3) {
+;   return fmal(p1, p2, p3);
+; }
+; /*******************************************************************/
+
+define x86_fp80 @actual(x86_fp80 %p1, x86_fp80 %p2, x86_fp80 %p3) {
+;
+; CHECK: actual
+;
+entry:
+  %p1.addr = alloca x86_fp80, align 16
+  %p2.addr = alloca x86_fp80, align 16
+  %p3.addr = alloca x86_fp80, align 16
+  store x86_fp80 %p1, ptr %p1.addr, align 16
+  store x86_fp80 %p2, ptr %p2.addr, align 16
+  store x86_fp80 %p3, ptr %p3.addr, align 16
+  %0 = load x86_fp80, ptr %p1.addr, align 16
+  %1 = load x86_fp80, ptr %p2.addr, align 16
+  %2 = load x86_fp80, ptr %p3.addr, align 16
+;
+; CHECK: callq{{.*}}foo
+;
+  %3 = call x86_fp80 @foo(x86_fp80 %0, x86_fp80 %1, x86_fp80 %2)
+  %4 = load x86_fp80, ptr %p1.addr, align 16
+  call void (...) @llvm.fake.use(x86_fp80 %4)
+  %5 = load x86_fp80, ptr %p2.addr, align 16
+  call void (...) @llvm.fake.use(x86_fp80 %5)
+  %6 = load x86_fp80, ptr %p3.addr, align 16
+  call void (...) @llvm.fake.use(x86_fp80 %6)
+;
+; CHECK: ret
+;
+  ret x86_fp80 %3
+}
+
+declare x86_fp80 @foo(x86_fp80, x86_fp80, x86_fp80)
+
+declare void @llvm.fake.use(...)
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.9.0"}
diff --git a/llvm/test/CodeGen/X86/fake-use-simple-tail-call.ll b/llvm/test/CodeGen/X86/fake-use-simple-tail-call.ll
new file mode 100644
index 00000000000000..06b4aae48447cc
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-simple-tail-call.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -O2 -o - \
+; RUN:   | FileCheck %s --implicit-check-not=TAILCALL
+; Generated with: clang -emit-llvm -O2 -S -fextend-lifetimes test.cpp -o -
+; =========== test.cpp ===============
+; extern int bar(int);
+; int foo1(int i)
+; {
+;     return bar(i);
+; }
+; =========== test.cpp ===============
+
+; CHECK: TAILCALL
+
+; ModuleID = 'test.cpp'
+source_filename = "test.cpp"
+
+define i32 @_Z4foo1i(i32 %i) local_unnamed_addr {
+entry:
+  %call = tail call i32 @_Z3bari(i32 %i)
+  tail call void (...) @llvm.fake.use(i32 %i)
+  ret i32 %call
+}
+
+declare i32 @_Z3bari(i32) local_unnamed_addr
+
+declare void @llvm.fake.use(...)
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 2}
+!1 = !{i32 7, !"PIC Level", i32 2}
+!2 = !{!"clang version 5.0.1"}
diff --git a/llvm/test/CodeGen/X86/fake-use-split-ret.ll b/llvm/test/CodeGen/X86/fake-use-split-ret.ll
new file mode 100644
index 00000000000000..eff11105723142
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-split-ret.ll
@@ -0,0 +1,53 @@
+; RUN: opt -mtriple=x86_64-unknown-unknown -S -codegenprepare <%s -o - | FileCheck %s
+;
+; Ensure return instruction splitting ignores fake uses.
+;
+; IR Generated with clang -O2 -S -emit-llvm -fextend-lifetimes test.cpp
+;
+;// test.cpp
+;extern int bar(int);
+;
+;int foo2(int i)
+;{
+;  --i;
+;  if (i <= 0)
+;    return -1;
+;  return bar(i);
+;}
+
+; ModuleID = 'test.cpp'
+source_filename = "test.cpp"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+declare i32 @_Z3bari(i32) local_unnamed_addr
+
+; Function Attrs: nounwind
+declare void @llvm.fake.use(...)
+
+; Function Attrs: nounwind sspstrong uwtable
+define i32 @_Z4foo2i(i32 %i) local_unnamed_addr {
+entry:
+  %dec = add nsw i32 %i, -1
+  %cmp = icmp slt i32 %i, 2
+  br i1 %cmp, label %cleanup, label %if.end
+
+if.end:                                           ; preds = %entry
+  %call = tail call i32 @_Z3bari(i32 %dec)
+; CHECK: ret i32 %call
+  br label %cleanup
+
+cleanup:                                          ; preds = %entry, %if.end
+; CHECK: cleanup:
+  %retval.0 = phi i32 [ %call, %if.end ], [ -1, %entry ]
+  tail call void (...) @llvm.fake.use(i32 %dec)
+; CHECK: ret i32 -1
+  ret i32 %retval.0
+}
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 2}
+!1 = !{i32 7, !"PIC Level", i32 2}
+!2 = !{!"clang version 7.0.0"}
diff --git a/llvm/test/CodeGen/X86/fake-use-sroa.ll b/llvm/test/CodeGen/X86/fake-use-sroa.ll
new file mode 100644
index 00000000000000..883a48fdd66954
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-sroa.ll
@@ -0,0 +1,54 @@
+; RUN: opt -S -passes=sroa %s | FileCheck %s
+; With fake use instrinsics generated for small aggregates, check that when
+; SROA slices the aggregate, we generate individual fake use intrinsics for
+; the individual values.
+
+; Generated from the following source:
+; struct s {
+;   int i;
+;   int j;
+; };
+;
+; void foo(struct s S) {
+; }
+;
+; void bar() {
+;   int arr[2] = {5, 6};
+; }
+;
+%struct.s = type { i32, i32 }
+ at __const.bar.arr = private unnamed_addr constant [2 x i32] [i32 5, i32 6], align 4
+
+; A small struct passed as parameter
+; CHECK-LABEL: define{{.*}}foo
+; CHECK:       %[[SLICE1:[^ ]+]] = trunc i64
+; CHECK:       %[[SLICE2:[^ ]+]] = trunc i64
+; CHECK-DAG:   call{{.*}} @llvm.fake.use(i32 %[[SLICE1]])
+; CHECK-DAG:   call{{.*}} @llvm.fake.use(i32 %[[SLICE2]])
+define dso_local void @foo(i64 %S.coerce) {
+entry:
+  %S = alloca %struct.s, align 4
+  store i64 %S.coerce, ptr %S, align 4
+  %fake.use = load %struct.s, ptr %S, align 4
+  notail call void (...) @llvm.fake.use(%struct.s %fake.use)
+  ret void
+}
+
+declare void @llvm.fake.use(...)
+
+; A local variable with a small array type.
+; CHECK-LABEL: define{{.*}}bar
+; CHECK:       %[[ARRAYSLICE1:[^ ]+]] = load
+; CHECK:       %[[ARRAYSLICE2:[^ ]+]] = load
+; CHECK-DAG:   call{{.*}} @llvm.fake.use(i32 %[[ARRAYSLICE1]])
+; CHECK-DAG:   call{{.*}} @llvm.fake.use(i32 %[[ARRAYSLICE2]])
+define dso_local void @bar() {
+entry:
+  %arr = alloca [2 x i32], align 4
+  call void @llvm.memcpy.p0i8.p0i8.i64(ptr align 4 %arr, ptr align 4 bitcast (ptr @__const.bar.arr to ptr), i64 8, i1 false)
+  %fake.use = load [2 x i32], ptr %arr, align 4
+  notail call void (...) @llvm.fake.use([2 x i32] %fake.use)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
diff --git a/llvm/test/CodeGen/X86/fake-use-suppress-load.ll b/llvm/test/CodeGen/X86/fake-use-suppress-load.ll
new file mode 100644
index 00000000000000..b49091333b0174
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-suppress-load.ll
@@ -0,0 +1,23 @@
+; Suppress redundant loads feeding into fake uses.
+; RUN: llc -filetype=asm -o - %s --mtriple=x86_64-unknown-unknown | FileCheck %s
+; Windows ABI works differently, there's no offset.
+;
+; Look for the spill
+; CHECK:      movq %r{{[a-z]+,}} -{{[0-9]+\(%rsp\)}}
+; CHECK-NOT:  movq -{{[0-9]+\(%rsp\)}}, %r{{[a-z]+}}
+
+define dso_local i32 @f(ptr %p) local_unnamed_addr {
+entry:
+  call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"() #1, !srcloc !2
+  notail call void (...) @llvm.fake.use(ptr %p)
+  ret i32 4
+}
+
+declare void @llvm.fake.use(...) #1
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0"}
+!2 = !{i32 -2147471544}
diff --git a/llvm/test/CodeGen/X86/fake-use-tailcall.ll b/llvm/test/CodeGen/X86/fake-use-tailcall.ll
new file mode 100644
index 00000000000000..4aaea6c5eeb24d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-tailcall.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -stop-after=finalize-isel -O2 - | FileCheck %s --implicit-check-not FAKE_USE
+; Fake uses following tail calls should be pulled in front
+; of the TCRETURN instruction. Fake uses using something defined by
+; the tail call or after it should be suppressed.
+
+; CHECK: body:
+; CHECK: bb.0.{{.*}}:
+; CHECK: %0:{{.*}}= COPY
+; CHECK: FAKE_USE %0
+; CHECK: TCRETURN
+
+define void @bar(i32 %v) {
+entry:
+  %call = tail call i32 @_Z3fooi(i32 %v)
+  %mul = mul nsw i32 %call, 3
+  notail call void (...) @llvm.fake.use(i32 %mul)
+  notail call void (...) @llvm.fake.use(i32 %call)
+  notail call void (...) @llvm.fake.use(i32 %v)
+  ret void
+}
+
+declare i32 @_Z3fooi(i32) local_unnamed_addr
+declare void @llvm.fake.use(...)
diff --git a/llvm/test/CodeGen/X86/fake-use-vector.ll b/llvm/test/CodeGen/X86/fake-use-vector.ll
new file mode 100644
index 00000000000000..699f3607da0af6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-vector.ll
@@ -0,0 +1,45 @@
+; assert in DAGlegalizer with fake use of 1-element vectors.
+; RUN: llc -stop-after=finalize-isel -filetype=asm -o - %s | FileCheck %s
+;
+; ModuleID = 't2.cpp'
+; source_filename = "t2.cpp"
+; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+;
+; Check that we get past ISel and generate FAKE_USE machine instructions for
+; one-element vectors.
+;
+; CHECK:       bb.0.entry:
+; CHECK-DAG:   %1:gr64 = COPY $rdi
+; CHECK-DAG:   %0:vr128 = COPY $xmm0
+; CHECK:       %2:vr64 =
+; CHECK-DAG:   FAKE_USE %1
+; CHECK-DAG:   FAKE_USE %0
+; CHECK:       RET
+
+
+target triple = "x86_64-unknown-unknown"
+
+; Function Attrs: nounwind sspstrong uwtable
+define <4 x float> @_Z3runDv4_fDv1_x(<4 x float> %r, i64 %b.coerce) local_unnamed_addr #0 {
+entry:
+  %0 = insertelement <1 x i64> undef, i64 %b.coerce, i32 0
+  %1 = bitcast i64 %b.coerce to x86_mmx
+  %2 = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %r, x86_mmx %1)
+  tail call void (...) @llvm.fake.use(<1 x i64> %0)
+  tail call void (...) @llvm.fake.use(<4 x float> %r)
+  ret <4 x float> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx)
+
+; Function Attrs: nounwind
+declare void @llvm.fake.use(...)
+
+attributes #0 = { "target-cpu"="btver2" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 5.0.0"}
diff --git a/llvm/test/CodeGen/X86/fake-use-vector2.ll b/llvm/test/CodeGen/X86/fake-use-vector2.ll
new file mode 100644
index 00000000000000..08d50504a81b8d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fake-use-vector2.ll
@@ -0,0 +1,33 @@
+; RUN: llc -stop-after=finalize-isel -filetype=asm -o - %s | FileCheck %s
+;
+; Make sure we can split vectors that are used as operands of FAKE_USE.
+
+; Generated from:
+;
+; typedef long __attribute__((ext_vector_type(8))) long8;
+; void test0() { long8 id208 {0, 1, 2, 3, 4, 5, 6, 7}; }
+
+; ModuleID = 't5.cpp'
+source_filename = "t5.cpp"
+
+
+; CHECK:     %0:vr256 = VMOV
+; CHECK:     %1:vr256 = VMOV
+; CHECK-DAG: FAKE_USE killed %1
+; CHECK-DAG: FAKE_USE killed %0
+; CHECK:     RET
+define void @_Z5test0v() local_unnamed_addr #0 {
+entry:
+  tail call void (...) @llvm.fake.use(<8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>) #1
+  ret void
+}
+
+declare void @llvm.fake.use(...)
+
+attributes #0 = { "target-cpu"="btver2" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 5.0.0"}
diff --git a/llvm/test/DebugInfo/X86/Inputs/check-fake-use.py b/llvm/test/DebugInfo/X86/Inputs/check-fake-use.py
new file mode 100644
index 00000000000000..3ac6f7eb828cce
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/Inputs/check-fake-use.py
@@ -0,0 +1,100 @@
+# Parsing dwarfdump's output to determine whether the location list for the
+# parameter "b" covers all of the function. The script is written in form of a
+# state machine and expects that dwarfdump output adheres to a certain order:
+# 1) The .debug_info section must appear before the .debug_loc section.
+# 2) The DW_AT_location attribute must appear before the parameter's name in the
+#    formal parameter DIE.
+#
+import re
+import sys
+
+DebugInfoPattern = r"\.debug_info contents:"
+SubprogramPattern = r"^0x[0-9a-f]+:\s+DW_TAG_subprogram"
+HighPCPattern = r"DW_AT_high_pc.*0x([0-9a-f]+)"
+FormalPattern = r"^0x[0-9a-f]+:\s+DW_TAG_formal_parameter"
+LocationPattern = r"DW_AT_location\s+\[DW_FORM_sec_offset\].*0x([a-f0-9]+)"
+DebugLocPattern = r'\[0x([a-f0-9]+),\s+0x([a-f0-9]+)\) ".text":'
+
+# States
+LookingForDebugInfo = 0
+LookingForSubProgram = LookingForDebugInfo + 1  # 1
+LookingForHighPC = LookingForSubProgram + 1  # 2
+LookingForFormal = LookingForHighPC + 1  # 3
+LookingForLocation = LookingForFormal + 1  # 4
+DebugLocations = LookingForLocation + 1  # 5
+AllDone = DebugLocations + 1  # 6
+
+# For each state, the state table contains 3-item sublists with the following
+# entries:
+# 1) The regex pattern we use in each state.
+# 2) The state we enter when we have a successful match for the current pattern.
+# 3) The state we enter when we do not have a successful match for the
+#    current pattern.
+StateTable = [
+    # LookingForDebugInfo
+    [DebugInfoPattern, LookingForSubProgram, LookingForDebugInfo],
+    # LookingForSubProgram
+    [SubprogramPattern, LookingForHighPC, LookingForSubProgram],
+    # LookingForHighPC
+    [HighPCPattern, LookingForFormal, LookingForHighPC],
+    # LookingForFormal
+    [FormalPattern, LookingForLocation, LookingForFormal],
+    # LookingForLocation
+    [LocationPattern, DebugLocations, LookingForFormal],
+    # DebugLocations
+    [DebugLocPattern, DebugLocations, AllDone],
+    # AllDone
+    [None, AllDone, AllDone],
+]
+
+# Symbolic indices
+StatePattern = 0
+NextState = 1
+FailState = 2
+
+State = LookingForDebugInfo
+FirstBeginOffset = -1
+
+# Read output from file provided as command arg
+with open(sys.argv[1], "r") as dwarf_dump_file:
+    for line in dwarf_dump_file:
+        if State == AllDone:
+            break
+        Pattern = StateTable[State][StatePattern]
+        # print "State: %d - Searching '%s' for '%s'" % (State, line, Pattern)
+        m = re.search(Pattern, line)
+        if m:
+            # Match. Depending on the state, we extract various values.
+            if State == LookingForHighPC:
+                HighPC = int(m.group(1), 16)
+            elif State == DebugLocations:
+                # Extract the range values
+                if FirstBeginOffset == -1:
+                    FirstBeginOffset = int(m.group(1), 16)
+                    # print "FirstBeginOffset set to %d" % FirstBeginOffset
+                EndOffset = int(m.group(2), 16)
+                # print "EndOffset set to %d" % EndOffset
+            State = StateTable[State][NextState]
+        else:
+            State = StateTable[State][FailState]
+
+Success = True
+
+# Check that the first entry start with 0 and that the last ending address
+# in our location list is close to the high pc of the subprogram.
+if State != AllDone:
+    print("Error in expected sequence of DWARF information:")
+    print(" State = %d\n" % State)
+    Success = False
+elif FirstBeginOffset == -1:
+    print("Location list for 'b' not found, did the debug info format change?")
+    Success = False
+elif FirstBeginOffset != 0 or abs(EndOffset - HighPC) > 16:
+    print("Location list for 'b' does not cover the whole function:")
+    print(
+        "Location starts at 0x%x, ends at 0x%x, HighPC = 0x%x"
+        % (FirstBeginOffset, EndOffset, HighPC)
+    )
+    Success = False
+
+sys.exit(not Success)
diff --git a/llvm/test/DebugInfo/X86/fake-use.ll b/llvm/test/DebugInfo/X86/fake-use.ll
new file mode 100644
index 00000000000000..3c4bef1cd7e75c
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/fake-use.ll
@@ -0,0 +1,98 @@
+; REQUIRES: object-emission
+
+; Make sure the fake use of 'b' at the end of 'foo' causes location information for 'b'
+; to extend all the way to the end of the function.
+
+; RUN: %llc_dwarf -O2 -filetype=obj -dwarf-linkage-names=Abstract < %s | llvm-dwarfdump -v - -o %t
+; RUN: %python %p/Inputs/check-fake-use.py %t
+
+; Generated with:
+; clang -O2 -g -S -emit-llvm -fextend-this-ptr fake-use.c
+;
+; int glob[10];
+; extern void bar();
+;
+; int foo(int b, int i)
+; {
+;    int loc = glob[i] * 2;
+;    if (b) {
+;      glob[2] = loc;
+;      bar();
+;    }
+;    return loc;
+; }
+;
+; ModuleID = 't2.c'
+source_filename = "t2.c"
+
+ at glob = common local_unnamed_addr global [10 x i32] zeroinitializer, align 16, !dbg !0
+
+; Function Attrs: nounwind sspstrong uwtable
+define i32 @foo(i32 %b, i32 %i) local_unnamed_addr !dbg !13 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !17, metadata !20), !dbg !21
+  %idxprom = sext i32 %i to i64, !dbg !22
+  %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @glob, i64 0, i64 %idxprom, !dbg !22
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !22, !tbaa !23
+  %mul = shl nsw i32 %0, 1, !dbg !22
+  %tobool = icmp eq i32 %b, 0, !dbg !27
+  br i1 %tobool, label %if.end, label %if.then, !dbg !29
+
+if.then:                                          ; preds = %entry
+  store i32 %mul, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @glob, i64 0, i64 2), align 8, !dbg !30, !tbaa !23
+  tail call void (...) @bar() #2, !dbg !32
+  br label %if.end, !dbg !33
+
+if.end:                                           ; preds = %entry, %if.then
+  tail call void (...) @llvm.fake.use(i32 %b), !dbg !34
+  ret i32 %mul, !dbg !35
+}
+
+declare void @bar(...) local_unnamed_addr
+
+; Function Attrs: nounwind
+declare void @llvm.fake.use(...)
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
+
+!llvm.dbg.cu = !{!1}
+!llvm.module.flags = !{!9, !10, !11}
+!llvm.ident = !{!12}
+
+!0 = distinct !DIGlobalVariableExpression(var: !DIGlobalVariable(name: "glob", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true), expr: !DIExpression())
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang version 4.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4)
+!2 = !DIFile(filename: "t2.c", directory: "/")
+!3 = !{}
+!4 = !{!0}
+!5 = !DICompositeType(tag: DW_TAG_array_type, baseType: !6, size: 320, align: 32, elements: !7)
+!6 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!7 = !{!8}
+!8 = !DISubrange(count: 10)
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{i32 1, !"PIC Level", i32 2}
+!12 = !{!"clang version 4.0.0"}
+!13 = distinct !DISubprogram(name: "foo", scope: !2, file: !2, line: 4, type: !14, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !1, retainedNodes: !16)
+!14 = !DISubroutineType(types: !15)
+!15 = !{!6, !6, !6}
+!16 = !{!17, !18, !19}
+!17 = !DILocalVariable(name: "b", arg: 1, scope: !13, file: !2, line: 4, type: !6)
+!18 = !DILocalVariable(name: "i", arg: 2, scope: !13, file: !2, line: 4, type: !6)
+!19 = !DILocalVariable(name: "loc", scope: !13, file: !2, line: 6, type: !6)
+!20 = !DIExpression()
+!21 = !DILocation(line: 4, scope: !13)
+!22 = !DILocation(line: 6, scope: !13)
+!23 = !{!24, !24, i64 0}
+!24 = !{!"int", !25, i64 0}
+!25 = !{!"omnipotent char", !26, i64 0}
+!26 = !{!"Simple C/C++ TBAA"}
+!27 = !DILocation(line: 7, scope: !28)
+!28 = distinct !DILexicalBlock(scope: !13, file: !2, line: 7)
+!29 = !DILocation(line: 7, scope: !13)
+!30 = !DILocation(line: 8, scope: !31)
+!31 = distinct !DILexicalBlock(scope: !28, file: !2, line: 7)
+!32 = !DILocation(line: 9, scope: !31)
+!33 = !DILocation(line: 10, scope: !31)
+!34 = !DILocation(line: 12, scope: !13)
+!35 = !DILocation(line: 11, scope: !13)
diff --git a/llvm/test/Transforms/GVN/fake-use-constprop.ll b/llvm/test/Transforms/GVN/fake-use-constprop.ll
new file mode 100644
index 00000000000000..11ed310083db17
--- /dev/null
+++ b/llvm/test/Transforms/GVN/fake-use-constprop.ll
@@ -0,0 +1,69 @@
+; RUN: opt -passes=gvn -S < %s | FileCheck %s
+;
+; The Global Value Numbering pass (GVN) propagates boolean values
+; that are constant in dominated basic blocks to all the uses
+; in these basic blocks. However, we don't want the constant propagated
+; into fake.use intrinsics since this would render the intrinsic useless
+; with respect to keeping the variable live up until the fake.use.
+; This test checks that we don't generate any fake.uses with constant 0.
+;
+; Reduced from the following test case, generated with clang -O2 -S -emit-llvm -fextend-lifetimes test.c
+;
+; extern void func1();
+; extern int bar();
+; extern void baz(int);
+;
+; int foo(int i, float f, int *punused)
+; {
+;   int j = 3*i;
+;   if (j > 0) {
+;     int m = bar(i);
+;     if (m) {
+;       char b = f;
+;       baz(b);
+;       if (b)
+;         goto lab;
+;       func1();
+;     }
+; lab:
+;     func1();
+;   }
+;   return 1;
+; }
+
+;; GVN should propagate a constant value through to a regular call, but not to
+;; a fake use, which should continue to track the original value.
+; CHECK: %[[CONV_VAR:[a-zA-Z0-9]+]] = fptosi
+; CHECK: call {{.+}} @bees(i8 0)
+; CHECK: call {{.+}} @llvm.fake.use(i8 %[[CONV_VAR]])
+
+define i32 @foo(float %f) {
+  %conv = fptosi float %f to i8
+  %tobool3 = icmp eq i8 %conv, 0
+  br i1 %tobool3, label %if.end, label %lab
+
+if.end:
+  tail call void (...) @bees(i8 %conv)
+  tail call void (...) @llvm.fake.use(i8 %conv)
+  br label %lab
+
+lab:
+  ret i32 1
+}
+
+declare i32 @bar(...)
+
+declare void @baz(i32)
+
+declare void @bees(i32)
+
+declare void @func1(...)
+
+; Function Attrs: nounwind
+declare void @llvm.fake.use(...)
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.9.0"}



More information about the llvm-commits mailing list