[llvm] 3da1a96 - [Statepoints] Support lowering gc relocations to virtual registers

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 25 14:26:20 PDT 2020


Author: Philip Reames
Date: 2020-07-25T14:26:05-07:00
New Revision: 3da1a9634eb9d92b5ffa2571215c350a9641d07b

URL: https://github.com/llvm/llvm-project/commit/3da1a9634eb9d92b5ffa2571215c350a9641d07b
DIFF: https://github.com/llvm/llvm-project/commit/3da1a9634eb9d92b5ffa2571215c350a9641d07b.diff

LOG: [Statepoints] Support lowering gc relocations to virtual registers

(Disabled under flag for the moment)

This is part of a larger project wherein we are finally integrating lowering of gc live operands with the register allocator.  Today, we force spill all operands in SelectionDAG.  The code to do so is distinctly non-optimal.  The approach this patch is working towards is to instead lower the relocations directly into the MI form, and let the register allocator pick which ones get spilled and which stack slots they get spilled to.  In terms of performance, the later part is actually more important as it avoids redundant shuffling of values between stack slots.

This particular change adds ISEL support to produce the variadic def STATEPOINT form required by the above.  In particular, the first N are lowered to variadic tied def/use pairs.  So new statepoint looks like this:
reloc1,reloc2,... = STATEPOINT ..., base1, derived1<tied-def0>, base2, derived2<tied-def1>, ...

N is limited by the maximal number of tied registers machine instruction can have (15 at the moment).

The current patch is restricted to handling relocations within a single basic block.  Cross block relocations (e.g. invokes) are handled via the legacy mechanism.  This restriction will be relaxed in future patches.

Patch By: dantrushin
Differential Revision: https://reviews.llvm.org/D81648

Added: 
    llvm/test/CodeGen/X86/statepoint-vreg.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
    llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
    llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
    llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
    llvm/lib/CodeGen/TargetLoweringBase.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 0e4e99214aa2..ff84fdd62075 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -82,6 +82,28 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
   return N;
 }
 
+/// Return starting index of GC operand list.
+// FIXME: need a better place for this. Put it in StackMaps?
+static unsigned getStatepointGCArgStartIdx(MachineInstr *MI) {
+  assert(MI->getOpcode() == TargetOpcode::STATEPOINT &&
+         "STATEPOINT node expected");
+  unsigned OperIdx = StatepointOpers(MI).getNumDeoptArgsIdx();
+  unsigned NumDeopts = MI->getOperand(OperIdx).getImm();
+  // At this point stack references has not been lowered yet, so they
+  // take single operand.
+  ++OperIdx;
+  while (NumDeopts--) {
+    MachineOperand &MO = MI->getOperand(OperIdx);
+    if (MO.isImm() && MO.getImm() == StackMaps::ConstantOp) {
+      ++OperIdx;
+      assert(MI->getOperand(OperIdx).isImm() &&
+             "Unexpected statepoint operand");
+    }
+    ++OperIdx;
+  }
+  return OperIdx;
+}
+
 /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
 /// implicit physical register output.
 void InstrEmitter::
@@ -200,6 +222,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
   bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() &&
                              II.isVariadic() && II.variadicOpsAreDefs();
   unsigned NumVRegs = HasVRegVariadicDefs ? NumResults : II.getNumDefs();
+  if (Node->getMachineOpcode() == TargetOpcode::STATEPOINT)
+    NumVRegs = NumResults;
   for (unsigned i = 0; i < NumVRegs; ++i) {
     // If the specific node value is only used by a CopyToReg and the dest reg
     // is a vreg in the same register class, use the CopyToReg'd destination
@@ -821,6 +845,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
       NumDefs = NumResults;
     }
     ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC);
+  } else if (Opc == TargetOpcode::STATEPOINT) {
+    NumDefs = NumResults;
   }
 
   unsigned NumImpUses = 0;
@@ -970,6 +996,20 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef())
     MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
 
+  // STATEPOINT is too 'dynamic' to have meaningful machine description.
+  // We have to manually tie operands.
+  if (Opc == TargetOpcode::STATEPOINT && NumDefs > 0) {
+    assert(!HasPhysRegOuts && "STATEPOINT mishandled");
+    MachineInstr *MI = MIB;
+    unsigned Def = 0;
+    unsigned Use = getStatepointGCArgStartIdx(MI) + 1;
+    while (Def < NumDefs) {
+      if (MI->getOperand(Use).isReg())
+        MI->tieOperands(Def++, Use);
+      Use += 2;
+    }
+  }
+
   // Run post-isel target hook to adjust this instruction if needed.
   if (II.hasPostISelHook())
     TLI->AdjustInstrPostInstrSelection(*MIB, Node);

diff  --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 17c68f2bf73b..76fb0340a7a8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -125,8 +125,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
     PhysReg = Reg;
   } else if (Def->isMachineOpcode()) {
     const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
-    if (ResNo >= II.getNumDefs() &&
-        II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
+    if (ResNo >= II.getNumDefs() && II.hasImplicitDefOfPhysReg(Reg))
       PhysReg = Reg;
   }
 

diff  --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 2cb57c1d1ccc..b8c4c73bccce 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -67,6 +67,10 @@ cl::opt<bool> UseRegistersForDeoptValues(
     "use-registers-for-deopt-values", cl::Hidden, cl::init(false),
     cl::desc("Allow using registers for non pointer deopt args"));
 
+cl::opt<unsigned> MaxRegistersForGCPointers(
+    "max-registers-for-gc-values", cl::Hidden, cl::init(0),
+    cl::desc("Max number of VRegs allowed to pass GC pointer meta args in"));
+
 static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops,
                                  SelectionDAGBuilder &Builder, uint64_t Value) {
   SDLoc L = Builder.getCurSDLoc();
@@ -86,11 +90,13 @@ void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
   // FunctionLoweringInfo.  Also need to ensure used bits get cleared.
   AllocatedStackSlots.clear();
   AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size());
+  DerivedPtrMap.clear();
 }
 
 void StatepointLoweringState::clear() {
   Locations.clear();
   AllocatedStackSlots.clear();
+  DerivedPtrMap.clear();
   assert(PendingGCRelocateCalls.empty() &&
          "cleared before statepoint sequence completed");
 }
@@ -221,7 +227,6 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
   return None;
 }
 
-
 /// Return true if-and-only-if the given SDValue can be lowered as either a
 /// constant argument or a stack reference.  The key point is that the value
 /// doesn't need to be spilled or tracked as a vreg use.
@@ -242,7 +247,6 @@ static bool willLowerDirectly(SDValue Incoming) {
           Incoming.isUndef());
 }
 
-
 /// Try to find existing copies of the incoming values in stack slots used for
 /// statepoint spilling.  If we can find a spill slot for the incoming value,
 /// mark that slot as allocated, and reuse the same slot for this safepoint.
@@ -388,7 +392,7 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
                                  StoreMMO);
 
     MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc));
-    
+
     Builder.StatepointLowering.setLocation(Incoming, Loc);
   }
 
@@ -485,7 +489,9 @@ lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot,
 /// will be set to the last value spilled (if any were).
 static void
 lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
-                        SmallVectorImpl<MachineMemOperand*> &MemRefs,                                    SelectionDAGBuilder::StatepointLoweringInfo &SI,
+                        SmallVectorImpl<MachineMemOperand *> &MemRefs,
+                        DenseMap<SDValue, int> &LowerAsVReg,
+                        SelectionDAGBuilder::StatepointLoweringInfo &SI,
                         SelectionDAGBuilder &Builder) {
   // Lower the deopt and gc arguments for this statepoint.  Layout will be:
   // deopt argument length, deopt arguments.., gc arguments...
@@ -531,6 +537,37 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   const bool LiveInDeopt =
     SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn;
 
+  // Decide which deriver pointers will go on VRegs
+  const unsigned MaxTiedRegs = 15; // Max  number of tied regs MI can have.
+  unsigned MaxVRegPtrs =
+      std::min(MaxTiedRegs, MaxRegistersForGCPointers.getValue());
+  // Use old spill scheme for cross-block relocates.
+  if (SI.StatepointInstr) {
+    const BasicBlock *BB = SI.StatepointInstr->getParent();
+    bool NonLocalReloc =
+        llvm::any_of(SI.GCRelocates, [BB](const GCRelocateInst *R) {
+          return R->getParent() != BB;
+        });
+    if (NonLocalReloc)
+      MaxVRegPtrs = 0;
+  }
+
+  LLVM_DEBUG(dbgs() << "Desiding how to lower GC Pointers:\n");
+  unsigned CurNumVRegs = 0;
+  for (const Value *P : SI.Ptrs) {
+    if (LowerAsVReg.size() == MaxVRegPtrs)
+      break;
+    SDValue PtrSD = Builder.getValue(P);
+    if (willLowerDirectly(PtrSD) || P->getType()->isVectorTy()) {
+      LLVM_DEBUG(dbgs() << "direct/spill "; PtrSD.dump(&Builder.DAG));
+      continue;
+    }
+    LLVM_DEBUG(dbgs() << "vreg "; PtrSD.dump(&Builder.DAG));
+    LowerAsVReg[PtrSD] = CurNumVRegs++;
+  }
+  LLVM_DEBUG(dbgs() << LowerAsVReg.size()
+                    << " derived pointers will go in vregs\n");
+
   auto isGCValue = [&](const Value *V) {
     auto *Ty = V->getType();
     if (!Ty->isPtrOrPtrVectorTy())
@@ -542,7 +579,9 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   };
 
   auto requireSpillSlot = [&](const Value *V) {
-    return !(LiveInDeopt || UseRegistersForDeoptValues) || isGCValue(V);
+    if (isGCValue(V))
+      return !LowerAsVReg.count(Builder.getValue(V));
+    return !(LiveInDeopt || UseRegistersForDeoptValues);
   };
 
   // Before we actually start lowering (and allocating spill slots for values),
@@ -554,9 +593,14 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
     if (requireSpillSlot(V))
       reservePreviousStackSlotForValue(V, Builder);
   }
+
   for (unsigned i = 0; i < SI.Bases.size(); ++i) {
-    reservePreviousStackSlotForValue(SI.Bases[i], Builder);
-    reservePreviousStackSlotForValue(SI.Ptrs[i], Builder);
+    SDValue SDV = Builder.getValue(SI.Bases[i]);
+    if (!LowerAsVReg.count(SDV))
+      reservePreviousStackSlotForValue(SI.Bases[i], Builder);
+    SDV = Builder.getValue(SI.Ptrs[i]);
+    if (!LowerAsVReg.count(SDV))
+      reservePreviousStackSlotForValue(SI.Ptrs[i], Builder);
   }
 
   // First, prefix the list with the number of unique values to be
@@ -567,6 +611,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
 
   // The vm state arguments are lowered in an opaque manner.  We do not know
   // what type of values are contained within.
+  LLVM_DEBUG(dbgs() << "Lowering deopt state\n");
   for (const Value *V : SI.DeoptState) {
     SDValue Incoming;
     // If this is a function argument at a static frame index, generate it as
@@ -578,6 +623,8 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
     }
     if (!Incoming.getNode())
       Incoming = Builder.getValue(V);
+    LLVM_DEBUG(dbgs() << "Value " << *V
+                      << " requireSpillSlot = " << requireSpillSlot(V) << "\n");
     lowerIncomingStatepointValue(Incoming, requireSpillSlot(V), Ops, MemRefs,
                                  Builder);
   }
@@ -588,14 +635,15 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   // it's (lowered) derived pointer.  i.e
   // (base[0], ptr[0], base[1], ptr[1], ...)
   for (unsigned i = 0; i < SI.Bases.size(); ++i) {
-    const Value *Base = SI.Bases[i];
-    lowerIncomingStatepointValue(Builder.getValue(Base),
-                                 /*RequireSpillSlot*/ true, Ops, MemRefs,
+    bool RequireSpillSlot;
+    SDValue Base = Builder.getValue(SI.Bases[i]);
+    RequireSpillSlot = !LowerAsVReg.count(Base);
+    lowerIncomingStatepointValue(Base, RequireSpillSlot, Ops, MemRefs,
                                  Builder);
 
-    const Value *Ptr = SI.Ptrs[i];
-    lowerIncomingStatepointValue(Builder.getValue(Ptr),
-                                 /*RequireSpillSlot*/ true, Ops, MemRefs,
+    SDValue Derived = Builder.getValue(SI.Ptrs[i]);
+    RequireSpillSlot = !LowerAsVReg.count(Derived);
+    lowerIncomingStatepointValue(Derived, RequireSpillSlot, Ops, MemRefs,
                                  Builder);
   }
 
@@ -630,7 +678,9 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
     SDValue SDV = Builder.getValue(V);
     SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
 
-    if (Loc.getNode()) {
+    if (LowerAsVReg.count(SDV)) {
+      SpillMap[V] = None;
+    } else if (Loc.getNode()) {
       SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
     } else {
       // Record value as visited, but not spilled. This is case for allocas
@@ -665,6 +715,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
   assert(SI.Bases.size() == SI.Ptrs.size() &&
          SI.Ptrs.size() <= SI.GCRelocates.size());
 
+  LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n");
 #ifndef NDEBUG
   for (auto *Reloc : SI.GCRelocates)
     if (Reloc->getParent() == SI.StatepointInstr->getParent())
@@ -674,7 +725,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
   // Lower statepoint vmstate and gcstate arguments
   SmallVector<SDValue, 10> LoweredMetaArgs;
   SmallVector<MachineMemOperand*, 16> MemRefs;
-  lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, SI, *this);
+  // Maps derived pointer SDValue to statepoint result of relocated pointer.
+  DenseMap<SDValue, int> LowerAsVReg;
+  lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, LowerAsVReg, SI, *this);
 
   // Now that we've emitted the spills, we need to update the root so that the
   // call sequence is ordered correctly.
@@ -788,14 +841,35 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
 
   // Compute return values.  Provide a glue output since we consume one as
   // input.  This allows someone else to chain off us as needed.
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<EVT, 8> NodeTys;
+  for (auto &Ptr : SI.Ptrs) {
+    SDValue SD = getValue(Ptr);
+    if (LowerAsVReg.count(SD)) {
+      NodeTys.push_back(SD.getValueType());
+    }
+  }
+  LLVM_DEBUG(dbgs() << "Statepoint has " << NodeTys.size() << " results\n");
+  assert(NodeTys.size() == LowerAsVReg.size() && "Inconsistent GC Ptr lowering");
+  NodeTys.push_back(MVT::Other);
+  NodeTys.push_back(MVT::Glue);
 
+  unsigned NumResults = NodeTys.size();
   MachineSDNode *StatepointMCNode =
     DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
   DAG.setNodeMemRefs(StatepointMCNode, MemRefs);
 
   SDNode *SinkNode = StatepointMCNode;
 
+  // Fill mapping from derived pointer to statepoint result denoting its
+  // relocated value.
+  auto &DPtrMap = StatepointLowering.DerivedPtrMap;
+  for (const auto *Relocate : SI.GCRelocates) {
+    Value *Derived = Relocate->getDerivedPtr();
+    SDValue SD = getValue(Derived);
+    if (LowerAsVReg.count(SD))
+      DPtrMap[Derived] = SDValue(StatepointMCNode, LowerAsVReg[SD]);
+  }
+
   // Build the GC_TRANSITION_END node if necessary.
   //
   // See the comment above regarding GC_TRANSITION_START for the layout of
@@ -804,7 +878,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
     SmallVector<SDValue, 8> TEOps;
 
     // Add chain
-    TEOps.push_back(SDValue(StatepointMCNode, 0));
+    TEOps.push_back(SDValue(StatepointMCNode, NumResults - 2));
 
     // Add GC transition arguments
     for (const Value *V : SI.GCTransitionArgs) {
@@ -814,7 +888,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
     }
 
     // Add glue
-    TEOps.push_back(SDValue(StatepointMCNode, 1));
+    TEOps.push_back(SDValue(StatepointMCNode, NumResults - 1));
 
     SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
 
@@ -825,7 +899,12 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
   }
 
   // Replace original call
-  DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root
+  // Call: ch,glue = CALL ...
+  // Statepoint: [gc relocates],ch,glue = STATEPOINT ...
+  unsigned NumSinkValues = SinkNode->getNumValues();
+  SDValue StatepointValues[2] = {SDValue(SinkNode, NumSinkValues - 2),
+                                 SDValue(SinkNode, NumSinkValues - 1)};
+  DAG.ReplaceAllUsesWith(CallNode, StatepointValues);
   // Remove original call node
   DAG.DeleteNode(CallNode);
 
@@ -927,7 +1006,7 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
     setValue(&I, ReturnValue);
     return;
   }
-  
+
   // Result value will be used in a 
diff erent basic block so we need to export
   // it now.  Default exporting mechanism will not work here because statepoint
   // call has a 
diff erent type than the actual call. It means that by default
@@ -1010,12 +1089,13 @@ void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) {
 }
 
 void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
+  const BasicBlock *StatepointBB = Relocate.getStatepoint()->getParent();
 #ifndef NDEBUG
   // Consistency check
   // We skip this check for relocates not in the same basic block as their
   // statepoint. It would be too expensive to preserve validation info through
   // 
diff erent basic blocks.
-  if (Relocate.getStatepoint()->getParent() == Relocate.getParent())
+  if (StatepointBB == Relocate.getParent())
     StatepointLowering.relocCallVisited(Relocate);
 
   auto *Ty = Relocate.getType()->getScalarType();
@@ -1033,6 +1113,16 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
     return;
   }
 
+  // Relocate is local to statepoint block and its pointer was assigned
+  // to VReg. Use corresponding statepoint result.
+  auto &DPtrMap = StatepointLowering.DerivedPtrMap;
+  auto It = DPtrMap.find(DerivedPtr);
+  if (It != DPtrMap.end()) {
+    setValue(&Relocate, It->second);
+    assert(Relocate.getParent() == StatepointBB && "unexpected DPtrMap entry");
+    return;
+  }
+
   auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()];
   auto SlotIt = SpillMap.find(DerivedPtr);
   assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value");

diff  --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
index 634ef87f3840..d6c18379c5ad 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -103,6 +103,10 @@ class StatepointLoweringState {
     return AllocatedStackSlots.test(Offset);
   }
 
+  /// For each statepoint keep mapping from original derived pointer to
+  /// the statepoint node result defining its new value.
+  DenseMap<const Value *, SDValue> DerivedPtrMap;
+
 private:
   /// Maps pre-relocation value (gc pointer directly incoming into statepoint)
   /// into it's location (currently only stack slots)

diff  --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 2c94c2c62e5f..db4fcf7494c7 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1041,9 +1041,19 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
   // Inherit previous memory operands.
   MIB.cloneMemRefs(*MI);
 
-  for (auto &MO : MI->operands()) {
+  for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
+    MachineOperand &MO = MI->getOperand(i);
     if (!MO.isFI()) {
+      // Index of Def operand this Use it tied to.
+      // Since Defs are coming before Uses, if Use is tied, then
+      // index of Def must be smaller that index of that Use.
+      // Also, Defs preserve their position in new MI.
+      unsigned TiedTo = i;
+      if (MO.isReg() && MO.isTied())
+        TiedTo = MI->findTiedOperandIdx(i);
       MIB.add(MO);
+      if (TiedTo < i)
+        MIB->tieOperands(TiedTo, MIB->getNumOperands() - 1);
       continue;
     }
 

diff  --git a/llvm/test/CodeGen/X86/statepoint-vreg.ll b/llvm/test/CodeGen/X86/statepoint-vreg.ll
new file mode 100644
index 000000000000..bb86e9e1f1cf
--- /dev/null
+++ b/llvm/test/CodeGen/X86/statepoint-vreg.ll
@@ -0,0 +1,907 @@
+; This run is to demonstrate what MIR SSA looks like.
+; RUN: llc -max-registers-for-gc-values=4 -stop-after finalize-isel < %s | FileCheck --check-prefix=CHECK-VREG %s
+; This run is to demonstrate register allocator work.
+; RUN: llc -max-registers-for-gc-values=4 -stop-after virtregrewriter < %s | FileCheck --check-prefix=CHECK-PREG %s
+; This run is to demonstrate resulting assembly/stackmaps.
+; NOTE: When D81647 is landed this run line will need to be adjusted!
+; RUN: llc -max-registers-for-gc-values=4 < %s | FileCheck --check-prefix=CHECK-ASM %s
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare i1 @return_i1()
+declare void @func()
+declare void @consume(i32 addrspace(1)*)
+declare void @consume2(i32 addrspace(1)*, i32 addrspace(1)*)
+declare void @consume5(i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*)
+declare void @use1(i32 addrspace(1)*, i8 addrspace(1)*)
+
+; test most simple relocate
+define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" {
+; CHECK-VREG-LABEL: name:            test_relocate
+; CHECK-VREG:    %0:gr64 = COPY $rdi
+; CHECK-VREG:    %1:gr64 = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, %0, %0(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al
+; CHECK-VREG:    %2:gr8 = COPY $al
+; CHECK-VREG:    $rdi = COPY %1
+; CHECK-VREG:    CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp
+
+; CHECK-PREG-LABEL: name:            test_relocate
+; CHECK-PREG:    renamable $rbx = COPY $rdi
+; CHECK-PREG:    renamable $rbx = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, killed renamable $rbx, renamable $rbx(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al
+; CHECK-PREG:    renamable $bpl = COPY killed $al
+; CHECK-PREG:    $rdi = COPY killed renamable $rbx
+; CHECK-PREG:    CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp
+
+; CHECK-ASM-LABEL: test_relocate:
+; CHECK-ASM:       # %bb.0:
+; CHECK-ASM-NEXT:	pushq	%rbp
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 16
+; CHECK-ASM-NEXT:	pushq	%rbx
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 24
+; CHECK-ASM-NEXT:	pushq	%rax
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 32
+; CHECK-ASM-NEXT:	.cfi_offset %rbx, -24
+; CHECK-ASM-NEXT:	.cfi_offset %rbp, -16
+; CHECK-ASM-NEXT:	movq	%rdi, %rbx
+; CHECK-ASM-NEXT:	callq	return_i1
+; CHECK-ASM-NEXT:  .Ltmp0:
+; CHECK-ASM-NEXT:	movl	%eax, %ebp
+; CHECK-ASM-NEXT:	movq	%rbx, %rdi
+; CHECK-ASM-NEXT:	callq	consume
+; CHECK-ASM-NEXT:	movl	%ebp, %eax
+; CHECK-ASM-NEXT:	addq	$8, %rsp
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 24
+; CHECK-ASM-NEXT:	popq	%rbx
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 16
+; CHECK-ASM-NEXT:	popq	%rbp
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 8
+; CHECK-ASM-NEXT:	retq
+entry:
+  %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a)]
+  %rel1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 0, i32 0)
+  %res1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
+  call void @consume(i32 addrspace(1)* %rel1)
+  ret i1 %res1
+}
+; test pointer variables intermixed with pointer constants
+define void @test_mixed(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) gc "statepoint-example" {
+; CHECK-VREG-LABEL: name:            test_mixed
+; CHECK-VREG:    %2:gr64 = COPY $rdx
+; CHECK-VREG:    %1:gr64 = COPY $rsi
+; CHECK-VREG:    %0:gr64 = COPY $rdi
+; CHECK-VREG:    %3:gr64, %4:gr64, %5:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, %2, %2(tied-def 0), 2, 0, 2, 0, %1, %1(tied-def 1), %0, %0(tied-def 2), csr_64
+; CHECK-VREG:    %6:gr32 = MOV32r0 implicit-def dead $eflags
+; CHECK-VREG:    %7:gr64 = SUBREG_TO_REG 0, killed %6, %subreg.sub_32bit
+; CHECK-VREG:    $rdi = COPY %5
+; CHECK-VREG:    $rsi = COPY %7
+; CHECK-VREG:    $rdx = COPY %4
+; CHECK-VREG:    $rcx = COPY %7
+; CHECK-VREG:    $r8 = COPY %3
+; CHECK-VREG:    CALL64pcrel32 @consume5, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit $rcx, implicit $r8, implicit-def $rsp, implicit-def $ssp
+
+; CHECK-PREG-LABEL: name:            test_mixed
+; CHECK-PREG:    renamable $r14 = COPY $rdx
+; CHECK-PREG:    renamable $r15 = COPY $rsi
+; CHECK-PREG:    renamable $rbx = COPY $rdi
+; CHECK-PREG:    renamable $r14, renamable $r15, renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, killed renamable $r14, renamable $r14(tied-def 0), 2, 0, 2, 0, killed renamable $r15, renamable $r15(tied-def 1), killed renamable $rbx, renamable $rbx(tied-def 2), csr_64, implicit-def $rsp, implicit-def $ssp
+; CHECK-PREG:    $rdi = COPY killed renamable $rbx
+; CHECK-PREG:    dead $esi = MOV32r0 implicit-def dead $eflags, implicit-def $rsi
+; CHECK-PREG:    $rdx = COPY killed renamable $r15
+; CHECK-PREG:    dead $ecx = MOV32r0 implicit-def dead $eflags, implicit-def $rcx
+; CHECK-PREG:    $r8 = COPY killed renamable $r14
+; CHECK-PREG:    CALL64pcrel32 @consume5, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit killed $rcx, implicit killed $r8, implicit-def $rsp, implicit-def $ssp
+
+; CHECK-ASM-LABEL: test_mixed:
+; CHECK-ASM:        # %bb.0:                                # %entry
+; CHECK-ASM-NEXT:	pushq	%r15
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 16
+; CHECK-ASM-NEXT:	pushq	%r14
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 24
+; CHECK-ASM-NEXT:	pushq	%rbx
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 32
+; CHECK-ASM-NEXT:	.cfi_offset %rbx, -32
+; CHECK-ASM-NEXT:	.cfi_offset %r14, -24
+; CHECK-ASM-NEXT:	.cfi_offset %r15, -16
+; CHECK-ASM-NEXT:	movq	%rdx, %r14
+; CHECK-ASM-NEXT:	movq	%rsi, %r15
+; CHECK-ASM-NEXT:	movq	%rdi, %rbx
+; CHECK-ASM-NEXT:	callq	func
+; CHECK-ASM-NEXT:.Ltmp1:
+; CHECK-ASM-NEXT:	movq	%rbx, %rdi
+; CHECK-ASM-NEXT:	xorl	%esi, %esi
+; CHECK-ASM-NEXT:	movq	%r15, %rdx
+; CHECK-ASM-NEXT:	xorl	%ecx, %ecx
+; CHECK-ASM-NEXT:	movq	%r14, %r8
+; CHECK-ASM-NEXT:	callq	consume5
+; CHECK-ASM-NEXT:	popq	%rbx
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 24
+; CHECK-ASM-NEXT:	popq	%r14
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 16
+; CHECK-ASM-NEXT:	popq	%r15
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 8
+; CHECK-ASM-NEXT:	retq
+entry:
+  %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a, i32 addrspace(1)* null, i32 addrspace(1)* %b, i32 addrspace(1)* null, i32 addrspace(1)* %c)]
+  %rel1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 0, i32 0)
+  %rel2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 1, i32 1)
+  %rel3 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 2, i32 2)
+  %rel4 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 3, i32 3)
+  %rel5 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 4, i32 4)
+  call void @consume5(i32 addrspace(1)* %rel1, i32 addrspace(1)* %rel2, i32 addrspace(1)* %rel3, i32 addrspace(1)* %rel4, i32 addrspace(1)* %rel5)
+  ret void
+}
+
+; same as above, but for alloca
+define i32 addrspace(1)* @test_alloca(i32 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-VREG-LABEL: name:            test_alloca
+; CHECK-VREG:    %0:gr64 = COPY $rdi
+; CHECK-VREG:    MOV64mr %stack.0.alloca, 1, $noreg, 0, $noreg, %0 :: (store 8 into %ir.alloca)
+; CHECK-VREG:    %1:gr64 = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, %0, %0(tied-def 0), 0, %stack.0.alloca, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al :: (volatile load store 8 on %stack.0.alloca)
+; CHECK-VREG:    %2:gr8 = COPY $al
+; CHECK-VREG:    %3:gr64 = MOV64rm %stack.0.alloca, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.alloca)
+; CHECK-VREG:    $rdi = COPY %1
+; CHECK-VREG:    CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp
+
+; CHECK-PREG-LABEL: name:            test_alloca
+; CHECK-PREG:    renamable $rbx = COPY $rdi
+; CHECK-PREG:    MOV64mr %stack.0.alloca, 1, $noreg, 0, $noreg, renamable $rbx :: (store 8 into %ir.alloca)
+; CHECK-PREG:    renamable $rbx = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, killed renamable $rbx, renamable $rbx(tied-def 0), 0, %stack.0.alloca, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $al :: (volatile load store 8 on %stack.0.alloca)
+; CHECK-PREG:    renamable $r14 = MOV64rm %stack.0.alloca, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.alloca)
+; CHECK-PREG:    $rdi = COPY killed renamable $rbx
+; CHECK-PREG:    CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp
+
+; CHECK-ASM-LABEL: test_alloca:
+; CHECK-ASM:       # %bb.0: # %entry
+; CHECK-ASM-NEXT:	pushq	%r14
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 16
+; CHECK-ASM-NEXT:	pushq	%rbx
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 24
+; CHECK-ASM-NEXT:	pushq	%rax
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 32
+; CHECK-ASM-NEXT:	.cfi_offset %rbx, -24
+; CHECK-ASM-NEXT:	.cfi_offset %r14, -16
+; CHECK-ASM-NEXT:	movq	%rdi, %rbx
+; CHECK-ASM-NEXT:	movq	%rdi, (%rsp)
+; CHECK-ASM-NEXT:	callq	return_i1
+; CHECK-ASM-NEXT:  .Ltmp2:
+; CHECK-ASM-NEXT:	movq	(%rsp), %r14
+; CHECK-ASM-NEXT:	movq	%rbx, %rdi
+; CHECK-ASM-NEXT:	callq	consume
+; CHECK-ASM-NEXT:	movq	%r14, %rax
+; CHECK-ASM-NEXT:	addq	$8, %rsp
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 24
+; CHECK-ASM-NEXT:	popq	%rbx
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 16
+; CHECK-ASM-NEXT:	popq	%r14
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 8
+; CHECK-ASM-NEXT:	retq
+entry:
+  %alloca = alloca i32 addrspace(1)*, align 8
+  store i32 addrspace(1)* %ptr, i32 addrspace(1)** %alloca
+  %safepoint_token = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)** %alloca, i32 addrspace(1)* %ptr)]
+  %rel1 = load i32 addrspace(1)*, i32 addrspace(1)** %alloca
+  %rel2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 1, i32 1)
+  call void @consume(i32 addrspace(1)* %rel2)
+  ret i32 addrspace(1)* %rel1
+}
+
+; test base != derived
+define void @test_base_derived(i32 addrspace(1)* %base, i32 addrspace(1)* %derived) gc "statepoint-example" {
+; CHECK-VREG-LABEL: name:            test_base_derived
+; CHECK-VREG:    %1:gr64 = COPY $rsi
+; CHECK-VREG:    %0:gr64 = COPY $rdi
+; CHECK-VREG:    MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0)
+; CHECK-VREG:    %2:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 1, 8, %stack.0, 0, %1(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0)
+; CHECK-VREG:    $rdi = COPY %2
+; CHECK-VREG:    CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp
+
+; CHECK-PREG-LABEL: name:            test_base_derived
+; CHECK-PREG:    renamable $rbx = COPY $rsi
+; CHECK-PREG:    MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rdi :: (store 8 into %stack.0)
+; CHECK-PREG:    renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 1, 8, %stack.0, 0, killed renamable $rbx(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0)
+; CHECK-PREG:    $rdi = COPY killed renamable $rbx
+; CHECK-PREG:    CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp
+
+; CHECK-ASM-LABEL: test_base_derived:
+; CHECK-ASM:       # %bb.0:
+; CHECK-ASM-NEXT:	pushq	%rbx
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 16
+; CHECK-ASM-NEXT:	subq	$16, %rsp
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 32
+; CHECK-ASM-NEXT:	.cfi_offset %rbx, -16
+; CHECK-ASM-NEXT:	movq	%rsi, %rbx
+; CHECK-ASM-NEXT:	movq	%rdi, 8(%rsp)
+; CHECK-ASM-NEXT:	callq	func
+; CHECK-ASM-NEXT:  .Ltmp3:
+; CHECK-ASM-NEXT:	movq	%rbx, %rdi
+; CHECK-ASM-NEXT:	callq	consume
+; CHECK-ASM-NEXT:	addq	$16, %rsp
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 16
+; CHECK-ASM-NEXT:	popq	%rbx
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 8
+; CHECK-ASM-NEXT:	retq
+  %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %base, i32 addrspace(1)* %derived)]
+  %reloc = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 0, i32 1)
+  call void @consume(i32 addrspace(1)* %reloc)
+  ret void
+}
+
+; deopt GC pointer not present in GC args must be spilled
+define void @test_deopt_gcpointer(i32 addrspace(1)* %a, i32 addrspace(1)* %b) gc "statepoint-example" {
+; CHECK-VREG-LABEL: name:            test_deopt_gcpointer
+; CHECK-VREG:    %1:gr64 = COPY $rsi
+; CHECK-VREG:    %0:gr64 = COPY $rdi
+; CHECK-VREG:    MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0)
+; CHECK-VREG:    %2:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, %1, %1(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0)
+; CHECK-VREG:    $rdi = COPY %2
+; CHECK-VREG:    CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp
+; CHECK-VREG:    RET 0
+
+; CHECK-PREG-LABEL: name:            test_deopt_gcpointer
+; CHECK-PREG:    renamable $rbx = COPY $rsi
+; CHECK-PREG:    MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rdi :: (store 8 into %stack.0)
+; CHECK-PREG:    renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, killed renamable $rbx, renamable $rbx(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0)
+; CHECK-PREG:    $rdi = COPY killed renamable $rbx
+; CHECK-PREG:    CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp
+
+; CHECK-ASM-LABEL: test_deopt_gcpointer:
+; CHECK-ASM:       # %bb.0:
+; CHECK-ASM-NEXT:	pushq	%rbx
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 16
+; CHECK-ASM-NEXT:	subq	$16, %rsp
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 32
+; CHECK-ASM-NEXT:	.cfi_offset %rbx, -16
+; CHECK-ASM-NEXT:	movq	%rsi, %rbx
+; CHECK-ASM-NEXT:	movq	%rdi, 8(%rsp)
+; CHECK-ASM-NEXT:	callq	func
+; CHECK-ASM-NEXT:  .Ltmp4:
+; CHECK-ASM-NEXT:	movq	%rbx, %rdi
+; CHECK-ASM-NEXT:	callq	consume
+; CHECK-ASM-NEXT:	addq	$16, %rsp
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 16
+; CHECK-ASM-NEXT:	popq	%rbx
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 8
+; CHECK-ASM-NEXT:	retq
+  %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["deopt" (i32 addrspace(1)* %a), "gc-live" (i32 addrspace(1)* %b)]
+  %rel = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 0, i32 0)
+  call void @consume(i32 addrspace(1)* %rel)
+  ret void
+}
+
+;; Two gc.relocates of the same input, should require only a single spill/fill
+define void @test_gcrelocate_uniqueing(i32 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-VREG-LABEL: name:            test_gcrelocate_uniqueing
+; CHECK-VREG:    %0:gr64 = COPY $rdi
+; CHECK-VREG:    %1:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 2, %0, 2, 4278124286, %0, %0(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp
+; CHECK-VREG:    $rdi = COPY %1
+; CHECK-VREG:    $rsi = COPY %1
+; CHECK-VREG:    CALL64pcrel32 @consume2, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit-def $rsp, implicit-def $ssp
+
+; CHECK-PREG-LABEL: name:            test_gcrelocate_uniqueing
+; CHECK-PREG:    renamable $rbx = COPY $rdi
+; CHECK-PREG:    renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 2, killed renamable $rbx, 2, 4278124286, renamable $rbx, renamable $rbx(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp
+; CHECK-PREG:    $rdi = COPY renamable $rbx
+; CHECK-PREG:    $rsi = COPY killed renamable $rbx
+; CHECK-PREG:    CALL64pcrel32 @consume2, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit killed $rsi, implicit-def $rsp, implicit-def $ssp
+
+; CHECK-ASM-LABEL: test_gcrelocate_uniqueing:
+; CHECK-ASM:       # %bb.0:
+; CHECK-ASM-NEXT:	pushq	%rbx
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 16
+; CHECK-ASM-NEXT:	.cfi_offset %rbx, -16
+; CHECK-ASM-NEXT:	movq	%rdi, %rbx
+; CHECK-ASM-NEXT:	callq	func
+; CHECK-ASM-NEXT: .Ltmp5:
+; CHECK-ASM-NEXT:	movq	%rbx, %rdi
+; CHECK-ASM-NEXT:	movq	%rbx, %rsi
+; CHECK-ASM-NEXT:	callq	consume2
+; CHECK-ASM-NEXT:	popq	%rbx
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 8
+; CHECK-ASM-NEXT:	retq
+  %tok = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["deopt" (i32 addrspace(1)* %ptr, i32 undef), "gc-live" (i32 addrspace(1)* %ptr, i32 addrspace(1)* %ptr)]
+  %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 0, i32 0)
+  %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 1, i32 1)
+  call void @consume2(i32 addrspace(1)* %a, i32 addrspace(1)* %b)
+  ret void
+}
+
+; Two gc.relocates of a bitcasted pointer should only require a single spill/fill
+define void @test_gcptr_uniqueing(i32 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-VREG-LABEL: name:            test_gcptr_uniqueing
+; CHECK-VREG:    %0:gr64 = COPY $rdi
+; CHECK-VREG:    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+; CHECK-VREG:    %1:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 2, %0, 2, 4278124286, %0, %0(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp
+; CHECK-VREG:    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+; CHECK-VREG:    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+; CHECK-VREG:    $rdi = COPY %1
+; CHECK-VREG:    $rsi = COPY %1
+; CHECK-VREG:    CALL64pcrel32 @use1, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit-def $rsp, implicit-def $ssp
+
+; CHECK-PREG-LABEL: name:            test_gcptr_uniqueing
+; CHECK-PREG:    renamable $rbx = COPY $rdi
+; CHECK-PREG:    renamable $rbx = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 2, killed renamable $rbx, 2, 4278124286, renamable $rbx, renamable $rbx(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp
+; CHECK-PREG:    $rdi = COPY renamable $rbx
+; CHECK-PREG:    $rsi = COPY killed renamable $rbx
+; CHECK-PREG:    CALL64pcrel32 @use1, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit killed $rsi, implicit-def $rsp, implicit-def $ssp
+
+; CHECK-ASM-LABEL: test_gcptr_uniqueing:
+; CHECK-ASM:       # %bb.0:
+; CHECK-ASM-NEXT:	pushq	%rbx
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 16
+; CHECK-ASM-NEXT:	.cfi_offset %rbx, -16
+; CHECK-ASM-NEXT:	movq	%rdi, %rbx
+; CHECK-ASM-NEXT:	callq	func
+; CHECK-ASM-NEXT:  .Ltmp6:
+; CHECK-ASM-NEXT:	movq	%rbx, %rdi
+; CHECK-ASM-NEXT:	movq	%rbx, %rsi
+; CHECK-ASM-NEXT:	callq	use1
+; CHECK-ASM-NEXT:	popq	%rbx
+; CHECK-ASM-NEXT:	.cfi_def_cfa_offset 8
+; CHECK-ASM-NEXT:	retq
+  %ptr2 = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
+  %tok = tail call token (i64, i32, void ()*, i32, i32, ...)
+      @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["deopt" (i32 addrspace(1)* %ptr, i32 undef), "gc-live" (i32 addrspace(1)* %ptr, i8 addrspace(1)* %ptr2)]
+  %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 0, i32 0)
+  %b = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %tok, i32 1, i32 1)
+  call void @use1(i32 addrspace(1)* %a, i8 addrspace(1)* %b)
+  ret void
+}
+
+;
+; Cross-basicblock relocates are handled with spilling for now.
+; No need to check post-RA output
+define i1 @test_cross_bb(i32 addrspace(1)* %a, i1 %external_cond) gc "statepoint-example" {
+; CHECK-VREG-LABEL: name:            test_cross_bb
+; CHECK-VREG:  bb.0.entry:
+; CHECK-VREG:         %1:gr32 = COPY $esi
+; CHECK-VREG-NEXT:    %0:gr64 = COPY $rdi
+; CHECK-VREG-NEXT:    %3:gr8 = COPY %1.sub_8bit
+; CHECK-VREG-NEXT:    MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0)
+; CHECK-VREG-NEXT:    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+; CHECK-VREG-NEXT:    STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 1, 8, %stack.0, 0, 1, 8, %stack.0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al :: (volatile load store 8 on %stack.0)
+; CHECK-VREG-NEXT:    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+; CHECK-VREG-NEXT:    %4:gr8 = COPY $al
+; CHECK-VREG-NEXT:    %2:gr8 = COPY %4
+; CHECK-VREG-NEXT:    TEST8ri killed %3, 1, implicit-def $eflags
+; CHECK-VREG-NEXT:    JCC_1 %bb.2, 4, implicit $eflags
+; CHECK-VREG-NEXT:    JMP_1 %bb.1
+; CHECK-VREG:       bb.1.left:
+; CHECK-VREG-NEXT:    %6:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0)
+; CHECK-VREG-NEXT:    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+; CHECK-VREG-NEXT:    $rdi = COPY %6
+; CHECK-VREG-NEXT:    CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp
+; CHECK-VREG-NEXT:    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+; CHECK-VREG-NEXT:    $al = COPY %2
+; CHECK-VREG-NEXT:    RET 0, $al
+; CHECK-VREG:       bb.2.right:
+; CHECK-VREG-NEXT:    %5:gr8 = MOV8ri 1
+; CHECK-VREG-NEXT:    $al = COPY %5
+; CHECK-VREG-NEXT:    RET 0, $al
+
+entry:
+  %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a)]
+  br i1 %external_cond, label %left, label %right
+
+left:
+  %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 0, i32 0)
+  %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
+  call void @consume(i32 addrspace(1)* %call1)
+  ret i1 %call2
+
+right:
+  ret i1 true
+}
+
+; No need to check post-regalloc output as it is the same
+define i1 @duplicate_reloc() gc "statepoint-example" {
+; CHECK-VREG-LABEL: name:            duplicate_reloc
+; CHECK-VREG:  bb.0.entry:
+; CHECK-VREG:    STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp
+; CHECK-VREG:    STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp
+; CHECK-VREG:    %0:gr8 = MOV8ri 1
+; CHECK-VREG:    $al = COPY %0
+; CHECK-VREG:    RET 0, $al
+
+; CHECK-ASM-LABEL: duplicate_reloc:
+; CHECK-ASM:       # %bb.0: # %entry
+; CHECK-ASM-NEXT:          pushq	%rax
+; CHECK-ASM-NEXT:          .cfi_def_cfa_offset 16
+; CHECK-ASM-NEXT:          callq	func
+; CHECK-ASM-NEXT:  .Ltmp8:
+; CHECK-ASM-NEXT:          callq	func
+; CHECK-ASM-NEXT:  .Ltmp9:
+; CHECK-ASM-NEXT:          movb	$1, %al
+; CHECK-ASM-NEXT:          popq	%rcx
+; CHECK-ASM-NEXT:          .cfi_def_cfa_offset 8
+; CHECK-ASM-NEXT:          retq
+entry:
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* null, i32 addrspace(1)* null)]
+  %base = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 0, i32 0)
+  %derived = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 0, i32 1)
+  %safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %base, i32 addrspace(1)* %derived)]
+  %base_reloc = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2,  i32 0, i32 0)
+  %derived_reloc = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2,  i32 0, i32 1)
+  %cmp1 = icmp eq i32 addrspace(1)* %base_reloc, null
+  %cmp2 = icmp eq i32 addrspace(1)* %derived_reloc, null
+  %cmp = and i1 %cmp1, %cmp2
+  ret i1 %cmp
+}
+
+; Vectors cannot go in VRegs
+; No need to check post-regalloc output as it is lowered using old scheme
+define <2 x i8 addrspace(1)*> @test_vector(<2 x i8 addrspace(1)*> %obj) gc "statepoint-example" {
+; CHECK-VREG-LABEL: name:            test_vector
+; CHECK-VREG:    %0:vr128 = COPY $xmm0
+; CHECK-VREG:    MOVAPSmr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 16 into %stack.0)
+; CHECK-VREG:    STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 1, 16, %stack.0, 0, 1, 16, %stack.0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 16 on %stack.0)
+; CHECK-VREG:    %1:vr128 = MOVAPSrm %stack.0, 1, $noreg, 0, $noreg :: (load 16 from %stack.0)
+; CHECK-VREG:    $xmm0 = COPY %1
+; CHECK-VREG:    RET 0, $xmm0
+
+; CHECK-ASM-LABEL: test_vector:
+; CHECK-ASM:       # %bb.0: # %entry
+; CHECK-ASM-NEXT:          subq	$24, %rsp
+; CHECK-ASM-NEXT:          .cfi_def_cfa_offset 32
+; CHECK-ASM-NEXT:          movaps	%xmm0, (%rsp)
+; CHECK-ASM-NEXT:          callq	func
+; CHECK-ASM-NEXT:  .Ltmp10:
+; CHECK-ASM-NEXT:          movaps	(%rsp), %xmm0
+; CHECK-ASM-NEXT:          addq	$24, %rsp
+; CHECK-ASM-NEXT:          .cfi_def_cfa_offset 8
+; CHECK-ASM-NEXT:          retq
+entry:
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<2 x i8 addrspace(1)*> %obj)]
+  %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 0, i32 0) ; (%obj, %obj)
+  ret <2 x i8 addrspace(1)*> %obj.relocated
+}
+
+
+; test limit on amount of vregs
+define void @test_limit(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c, i32 addrspace(1)* %d, i32 addrspace(1)*  %e) gc "statepoint-example" {
+; CHECK-VREG-LABEL: name:            test_limit
+; CHECK-VREG:    %4:gr64 = COPY $r8
+; CHECK-VREG:    %3:gr64 = COPY $rcx
+; CHECK-VREG:    %2:gr64 = COPY $rdx
+; CHECK-VREG:    %1:gr64 = COPY $rsi
+; CHECK-VREG:    %0:gr64 = COPY $rdi
+; CHECK-VREG:    MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0)
+; CHECK-VREG:    %5:gr64, %6:gr64, %7:gr64, %8:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, %4, %4(tied-def 0), %3, %3(tied-def 1), %2, %2(tied-def 2), %1, %1(tied-def 3), 1, 8, %stack.0, 0, 1, 8, %stack.0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0)
+; CHECK-VREG:    %9:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0)
+; CHECK-VREG:    $rdi = COPY %9
+; CHECK-VREG:    $rsi = COPY %8
+; CHECK-VREG:    $rdx = COPY %7
+; CHECK-VREG:    $rcx = COPY %6
+; CHECK-VREG:    $r8 = COPY %5
+; CHECK-VREG:    CALL64pcrel32 @consume5, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit $rcx, implicit $r8, implicit-def $rsp, implicit-def $ssp
+; CHECK-VREG:    RET 0
+entry:
+  %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c, i32 addrspace(1)* %d, i32 addrspace(1)* %e)]
+  %rel1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 0, i32 0)
+  %rel2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 1, i32 1)
+  %rel3 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 2, i32 2)
+  %rel4 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 3, i32 3)
+  %rel5 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 4, i32 4)
+  call void @consume5(i32 addrspace(1)* %rel1, i32 addrspace(1)* %rel2, i32 addrspace(1)* %rel3, i32 addrspace(1)* %rel4, i32 addrspace(1)* %rel5)
+  ret void
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
+declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
+declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32)
+declare i1 @llvm.experimental.gc.result.i1(token)
+
+; CHECK-ASM-LABEL: .section .llvm_stackmaps
+; CHECK-ASM-NEXT:  __LLVM_StackMaps:
+; Entry for test_relocate
+; CHECK-ASM:	        .quad	0
+; CHECK-ASM-NEXT:     	.long	.Ltmp0-test_relocate
+; CHECK-ASM-NEXT:	.short	0
+; Num locations
+; CHECK-ASM-NEXT:	.short	5
+; Location 1 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 2 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 3 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 4 Register $rbx
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	3
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 5 Register $rbx
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	3
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+;  Entry for test_mixed
+; CHECK-ASM:     	.quad	0
+; CHECK-ASM-NEXT:	.long	.Ltmp1-test_mixed
+; CHECK-ASM-NEXT:	.short	0
+; Num locations
+; CHECK-ASM-NEXT:	.short	11
+; Location 1 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 2 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 3 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 4 Register $r14
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	14
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 5 Register $r14
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	14
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 6 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 7 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 8 Register $r15
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	15
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 9 Register $r15
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	15
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 10 Register $rbx
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	3
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 11 Register $rbx
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	3
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Entry for test_alloca
+; CHECK-ASM:     	.quad	0
+; CHECK-ASM-NEXT:	.long	.Ltmp2-test_alloca
+; CHECK-ASM-NEXT:	.short	0
+; Num locations
+; CHECK-ASM-NEXT:	.short	6
+; Location 1 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 2 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 3 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 4 Register $rbx
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	3
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 5 Register $rbx
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	3
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 6 Direct $rsp + 0
+; CHECK-ASM-NEXT:	.byte	2
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	7
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Entry for test_base_derive
+; CHECK-ASM:     	.quad	0
+; CHECK-ASM-NEXT:	.long	.Ltmp3-test_base_derived
+; CHECK-ASM-NEXT:	.short	0
+; Num locations
+; CHECK-ASM-NEXT:	.short	5
+; Location 1 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 2 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 3 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 4 Indirect $rsp + 8
+; CHECK-ASM-NEXT:	.byte	3
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	7
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	8
+; Location 5 Register $rbx
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	3
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Entry for test_deopt_gcpointer
+; CHECK-ASM:     	.quad	0
+; CHECK-ASM-NEXT:	.long	.Ltmp4-test_deopt_gcpointer
+; CHECK-ASM-NEXT:	.short	0
+; Num locations
+; CHECK-ASM-NEXT:	.short	6
+; Location 1 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 2 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 3 Constant 1
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	1
+; Location 4Indirect $rsp + 8
+; CHECK-ASM-NEXT:	.byte	3
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	7
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	8
+; Location 5 Register $rbx
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	3
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 6
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	3
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Entry for test_gcrelocate_uniqueing
+; CHECK-ASM:     	.quad	0
+; CHECK-ASM-NEXT:	.long	.Ltmp5-test_gcrelocate_uniqueing
+; CHECK-ASM-NEXT:	.short	0
+; Num locations
+; CHECK-ASM-NEXT:	.short	7
+; Location 1 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 2 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 3 Constant 2
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	2
+; Location 4 Register $rbx
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	3
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 5 Constant Index 0
+; CHECK-ASM-NEXT:	.byte	5
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 6 Register $rbx
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	3
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 7 Register $rbx
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	3
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Entry for test_gcptr_uniqueing
+; CHECK-ASM:     	.long	.Ltmp6-test_gcptr_uniqueing
+; CHECK-ASM-NEXT:	.short	0
+; Num locations
+; CHECK-ASM-NEXT:	.short	7
+; Location 1 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 2 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 3 Constant 2
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	2
+; Location 4 Register $rbx
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	3
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 5 Constant Index 0
+; CHECK-ASM-NEXT:	.byte	5
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 6 Register $rbx
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	3
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 7 Register $rbx
+; CHECK-ASM-NEXT:	.byte	1
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	3
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Entry for test_cross_bb
+; CHECK-ASM:     	.quad	0
+; CHECK-ASM-NEXT:	.long	.Ltmp7-test_cross_bb
+; CHECK-ASM-NEXT:	.short	0
+; Num locations
+; CHECK-ASM-NEXT:	.short	5
+; Location 1 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 2 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 3 Constant 0
+; CHECK-ASM-NEXT:	.byte	4
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 4 Indirect $rsp + 0
+; CHECK-ASM-NEXT:	.byte	3
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	7
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0
+; Location 5 Indirect $rsp + 0
+; CHECK-ASM-NEXT:	.byte	3
+; CHECK-ASM-NEXT:	.byte	0
+; CHECK-ASM-NEXT:	.short	8
+; CHECK-ASM-NEXT:	.short	7
+; CHECK-ASM-NEXT:	.short	0
+; CHECK-ASM-NEXT:	.long	0


        


More information about the llvm-commits mailing list