[llvm] 44f0d7f - Revert "[Codegen/Statepoint] Allow usage of registers for non gc deopt values."

Thu Apr 9 04:26:03 PDT 2020

Author: Serguei Katkov
Date: 2020-04-09T18:24:47+07:00
New Revision: 44f0d7f13647b5e155ab39295c78afd7486a5875

URL: https://github.com/llvm/llvm-project/commit/44f0d7f13647b5e155ab39295c78afd7486a5875
DIFF: https://github.com/llvm/llvm-project/commit/44f0d7f13647b5e155ab39295c78afd7486a5875.diff

LOG: Revert "[Codegen/Statepoint] Allow usage of registers for non gc deopt values."

This reverts commit a0275705bb5aa938119c3e7c8bc957a823450b17.

It causes buildbot failures building LLVM with BUILD_SHARED_LIBS due to a linker error.

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/Passes.h
    llvm/include/llvm/InitializePasses.h
    llvm/lib/CodeGen/CMakeLists.txt
    llvm/lib/CodeGen/CodeGen.cpp
    llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
    llvm/lib/CodeGen/TargetPassConfig.cpp

Removed: 
    llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
    llvm/test/CodeGen/X86/statepoint-regs.ll


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index b255447e2a98..7627baa2f059 100644

--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -475,10 +475,6 @@ namespace llvm {
 
   /// Creates MIR Debugify pass. \see MachineDebugify.cpp
   ModulePass *createDebugifyMachineModulePass();
-
-  /// The pass fixups statepoint machine instruction to replace usage of
-  /// caller saved registers with stack slots.
-  extern char &FixupStatepointCallerSavedID;
 } // End llvm namespace
 
 #endif

diff  --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 34a21541556a..a71079fdc2b5 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -152,7 +152,6 @@ void initializeExternalAAWrapperPassPass(PassRegistry&);
 void initializeFEntryInserterPass(PassRegistry&);
 void initializeFinalizeISelPass(PassRegistry&);
 void initializeFinalizeMachineBundlesPass(PassRegistry&);
-void initializeFixupStatepointCallerSavedPass(PassRegistry&);
 void initializeFlattenCFGPassPass(PassRegistry&);
 void initializeFloat2IntLegacyPassPass(PassRegistry&);
 void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&);

diff  --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 0bee58051b2e..38187897fb66 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -30,7 +30,6 @@ add_llvm_component_library(LLVMCodeGen
   FaultMaps.cpp
   FEntryInserter.cpp
   FinalizeISel.cpp
-  FixupStatepointCallerSaved.cpp
   FuncletLayout.cpp
   GCMetadata.cpp
   GCMetadataPrinter.cpp

diff  --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 6237b8dd77e0..f3b596a8f2bb 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -39,7 +39,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeFEntryInserterPass(Registry);
   initializeFinalizeISelPass(Registry);
   initializeFinalizeMachineBundlesPass(Registry);
-  initializeFixupStatepointCallerSavedPass(Registry);
   initializeFuncletLayoutPass(Registry);
   initializeGCMachineCodeAnalysisPass(Registry);
   initializeGCModuleInfoPass(Registry);

diff  --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
deleted file mode 100644
index 29478e9eeec9..000000000000
--- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
+++ /dev/null
@@ -1,310 +0,0 @@
-//===-- FixupStatepointCallerSaved.cpp - Fixup caller saved registers  ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Statepoint instruction in deopt parameters contains values which are
-/// meaningful to the runtime and should be able to be read at the moment the
-/// call returns. So we can say that we need to encode the fact that these
-/// values are "late read" by runtime. If we could express this notion for
-/// register allocator it would produce the right form for us.
-/// The need to fixup (i.e this pass) is specifically handling the fact that
-/// we cannot describe such a late read for the register allocator.
-/// Register allocator may put the value on a register clobbered by the call.
-/// This pass forces the spill of such registers and replaces corresponding
-/// statepoint operands to added spill slots.
-///
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/StackMaps.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/IR/Statepoint.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "fixup-statepoint-caller-saved"
-STATISTIC(NumSpilledRegisters, "Number of spilled register");
-STATISTIC(NumSpillSlotsAllocated, "Number of spill slots allocated");
-STATISTIC(NumSpillSlotsExtended, "Number of spill slots extended");
-
-static cl::opt<bool> FixupSCSExtendSlotSize(
-    "fixup-scs-extend-slot-size", cl::Hidden, cl::init(false),
-    cl::desc("Allow spill in spill slot of greater size than register size"),
-    cl::Hidden);
-
-namespace {
-
-class FixupStatepointCallerSaved : public MachineFunctionPass {
-public:
-  static char ID;
-
-  FixupStatepointCallerSaved() : MachineFunctionPass(ID) {
-    initializeFixupStatepointCallerSavedPass(*PassRegistry::getPassRegistry());
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  StringRef getPassName() const override {
-    return "Fixup Statepoint Caller Saved";
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-};
-} // End anonymous namespace.
-
-char FixupStatepointCallerSaved::ID = 0;
-char &llvm::FixupStatepointCallerSavedID = FixupStatepointCallerSaved::ID;
-
-INITIALIZE_PASS_BEGIN(FixupStatepointCallerSaved, DEBUG_TYPE,
-                      "Fixup Statepoint Caller Saved", false, false)
-INITIALIZE_PASS_END(FixupStatepointCallerSaved, DEBUG_TYPE,
-                    "Fixup Statepoint Caller Saved", false, false)
-
-// Utility function to get size of the register.
-static unsigned getRegisterSize(const TargetRegisterInfo &TRI, Register Reg) {
-  const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
-  return TRI.getSpillSize(*RC);
-}
-
-// Cache used frame indexes during statepoint re-write to re-use them in
-// processing next statepoint instruction.
-// Two strategies. One is to preserve the size of spill slot while another one
-// extends the size of spill slots to reduce the number of them, causing
-// the less total frame size. But unspill will have "implicit" any extend.
-class FrameIndexesCache {
-private:
-  struct FrameIndexesPerSize {
-    // List of used frame indexes during processing previous statepoints.
-    SmallVector<int, 8> Slots;
-    // Current index of un-used yet frame index.
-    unsigned Index = 0;
-  };
-  MachineFrameInfo &MFI;
-  const TargetRegisterInfo &TRI;
-  // Map size to list of frame indexes of this size. If the mode is
-  // FixupSCSExtendSlotSize then the key 0 is used to keep all frame indexes.
-  // If the size of required spill slot is greater than in a cache then the
-  // size will be increased.
-  DenseMap<unsigned, FrameIndexesPerSize> Cache;
-
-public:
-  FrameIndexesCache(MachineFrameInfo &MFI, const TargetRegisterInfo &TRI)
-      : MFI(MFI), TRI(TRI) {}
-  // Reset the current state of used frame indexes. After invocation of
-  // this function all frame indexes are available for allocation.
-  void reset() {
-    for (auto &It : Cache)
-      It.second.Index = 0;
-  }
-  // Get frame index to spill the register.
-  int getFrameIndex(Register Reg) {
-    unsigned Size = getRegisterSize(TRI, Reg);
-    // In FixupSCSExtendSlotSize mode the bucket with 0 index is used
-    // for all sizes.
-    unsigned Bucket = FixupSCSExtendSlotSize ? 0 : Size;
-    FrameIndexesPerSize &Line = Cache[Bucket];
-    if (Line.Index < Line.Slots.size()) {
-      int FI = Line.Slots[Line.Index++];
-      // If all sizes are kept together we probably need to extend the
-      // spill slot size.
-      if (MFI.getObjectSize(FI) < Size) {
-        MFI.setObjectSize(FI, Size);
-        MFI.setObjectAlignment(FI, Align(Size));
-        NumSpillSlotsExtended++;
-      }
-      return FI;
-    }
-    int FI = MFI.CreateSpillStackObject(Size, Size);
-    NumSpillSlotsAllocated++;
-    Line.Slots.push_back(FI);
-    ++Line.Index;
-    return FI;
-  }
-  // Sort all registers to spill in descendent order. In the
-  // FixupSCSExtendSlotSize mode it will minimize the total frame size.
-  // In non FixupSCSExtendSlotSize mode we can skip this step.
-  void sortRegisters(SmallVectorImpl<Register> &Regs) {
-    if (!FixupSCSExtendSlotSize)
-      return;
-    llvm::sort(Regs.begin(), Regs.end(), [&](Register &A, Register &B) {
-      return getRegisterSize(TRI, A) > getRegisterSize(TRI, B);
-    });
-  }
-};
-
-// Describes the state of the current processing statepoint instruction.
-class StatepointState {
-private:
-  // statepoint instruction.
-  MachineInstr &MI;
-  MachineFunction &MF;
-  const TargetRegisterInfo &TRI;
-  const TargetInstrInfo &TII;
-  MachineFrameInfo &MFI;
-  // Mask with callee saved registers.
-  const uint32_t *Mask;
-  // Cache of frame indexes used on previous instruction processing.
-  FrameIndexesCache &CacheFI;
-  // Operands with physical registers requiring spilling.
-  SmallVector<unsigned, 8> OpsToSpill;
-  // Set of register to spill.
-  SmallVector<Register, 8> RegsToSpill;
-  // Map Register to Frame Slot index.
-  DenseMap<Register, int> RegToSlotIdx;
-
-public:
-  StatepointState(MachineInstr &MI, const uint32_t *Mask,
-                  FrameIndexesCache &CacheFI)
-      : MI(MI), MF(*MI.getMF()), TRI(*MF.getSubtarget().getRegisterInfo()),
-        TII(*MF.getSubtarget().getInstrInfo()), MFI(MF.getFrameInfo()),
-        Mask(Mask), CacheFI(CacheFI) {}
-  // Return true if register is callee saved.
-  bool isCalleeSaved(Register Reg) { return (Mask[Reg / 32] >> Reg % 32) & 1; }
-  // Iterates over statepoint meta args to find caller saver registers.
-  // Also cache the size of found registers.
-  // Returns true if caller save registers found.
-  bool findRegistersToSpill() {
-    SmallSet<Register, 8> VisitedRegs;
-    for (unsigned Idx = StatepointOpers(&MI).getVarIdx(),
-                  EndIdx = MI.getNumOperands();
-         Idx < EndIdx; ++Idx) {
-      MachineOperand &MO = MI.getOperand(Idx);
-      if (!MO.isReg() || MO.isImplicit())
-        continue;
-      Register Reg = MO.getReg();
-      assert(Reg.isPhysical() && "Only physical regs are expected");
-      if (isCalleeSaved(Reg))
-        continue;
-      if (VisitedRegs.insert(Reg).second)
-        RegsToSpill.push_back(Reg);
-      OpsToSpill.push_back(Idx);
-    }
-    CacheFI.sortRegisters(RegsToSpill);
-    return !RegsToSpill.empty();
-  }
-  // Spill all caller saved registers right before statepoint instruction.
-  // Remember frame index where register is spilled.
-  void spillRegisters() {
-    for (Register Reg : RegsToSpill) {
-      int FI = CacheFI.getFrameIndex(Reg);
-      const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
-      TII.storeRegToStackSlot(*MI.getParent(), MI, Reg, true /*is_Kill*/, FI,
-                              RC, &TRI);
-      NumSpilledRegisters++;
-      RegToSlotIdx[Reg] = FI;
-    }
-  }
-  // Re-write statepoint machine instruction to replace caller saved operands
-  // with indirect memory location (frame index).
-  void rewriteStatepoint() {
-    MachineInstr *NewMI =
-        MF.CreateMachineInstr(TII.get(MI.getOpcode()), MI.getDebugLoc(), true);
-    MachineInstrBuilder MIB(MF, NewMI);
-
-    // Add End marker.
-    OpsToSpill.push_back(MI.getNumOperands());
-    unsigned CurOpIdx = 0;
-
-    for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
-      MachineOperand &MO = MI.getOperand(I);
-      if (I == OpsToSpill[CurOpIdx]) {
-        int FI = RegToSlotIdx[MO.getReg()];
-        MIB.addImm(StackMaps::IndirectMemRefOp);
-        MIB.addImm(getRegisterSize(TRI, MO.getReg()));
-        assert(MO.isReg() && "Should be register");
-        assert(MO.getReg().isPhysical() && "Should be physical register");
-        MIB.addFrameIndex(FI);
-        MIB.addImm(0);
-        ++CurOpIdx;
-      } else
-        MIB.add(MO);
-    }
-    assert(CurOpIdx == (OpsToSpill.size() - 1) && "Not all operands processed");
-    // Add mem operands.
-    NewMI->setMemRefs(MF, MI.memoperands());
-    for (auto It : RegToSlotIdx) {
-      int FrameIndex = It.second;
-      auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
-      auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
-                                          getRegisterSize(TRI, It.first),
-                                          MFI.getObjectAlign(FrameIndex));
-      NewMI->addMemOperand(MF, MMO);
-    }
-    // Insert new statepoint and erase old one.
-    MI.getParent()->insert(MI, NewMI);
-    MI.eraseFromParent();
-  }
-};
-
-class StatepointProcessor {
-private:
-  MachineFunction &MF;
-  const TargetRegisterInfo &TRI;
-  FrameIndexesCache CacheFI;
-
-public:
-  StatepointProcessor(MachineFunction &MF)
-      : MF(MF), TRI(*MF.getSubtarget().getRegisterInfo()),
-        CacheFI(MF.getFrameInfo(), TRI) {}
-
-  bool process(MachineInstr &MI) {
-    unsigned VarIdx = StatepointOpers(&MI).getVarIdx();
-    uint64_t Flags =
-        MI.getOperand(VarIdx + StatepointOpers::FlagsOffset).getImm();
-    // Do nothing for LiveIn, it supports all registers.
-    if (Flags & (uint64_t)StatepointFlags::DeoptLiveIn)
-      return false;
-    CallingConv::ID CC =
-        MI.getOperand(VarIdx + StatepointOpers::CCOffset).getImm();
-    const uint32_t *Mask = TRI.getCallPreservedMask(MF, CC);
-    CacheFI.reset();
-    StatepointState SS(MI, Mask, CacheFI);
-
-    if (!SS.findRegistersToSpill())
-      return false;
-
-    SS.spillRegisters();
-    SS.rewriteStatepoint();
-    return true;
-  }
-};
-
-bool FixupStatepointCallerSaved::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(MF.getFunction()))
-    return false;
-
-  const Function &F = MF.getFunction();
-  if (!F.hasGC())
-    return false;
-
-  SmallVector<MachineInstr *, 16> Statepoints;
-  for (MachineBasicBlock &BB : MF)
-    for (MachineInstr &I : BB)
-      if (I.getOpcode() == TargetOpcode::STATEPOINT)
-        Statepoints.push_back(&I);
-
-  if (Statepoints.empty())
-    return false;
-
-  bool Changed = false;
-  StatepointProcessor SPP(MF);
-  for (MachineInstr *I : Statepoints)
-    Changed |= SPP.process(*I);
-  return Changed;
-}

diff  --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 5c26b11397b9..ffe35637af28 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -61,10 +61,6 @@ STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered");
 STATISTIC(StatepointMaxSlotsRequired,
           "Maximum number of stack slots required for a singe statepoint");
 
-cl::opt<bool> UseRegistersForDeoptValues(
-    "use-registers-for-deopt-values", cl::Hidden, cl::init(false),
-    cl::desc("Allow using registers for non pointer deopt args"));
-
 static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops,
                                  SelectionDAGBuilder &Builder, uint64_t Value) {
   SDLoc L = Builder.getCurSDLoc();
@@ -413,8 +409,7 @@ lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot,
     // end up folding some of these into stack references, but they'll be
     // handled by the register allocator.  Note that we do not have the notion
     // of a late use so these values might be placed in registers which are
-    // clobbered by the call.  This is fine for live-in. For live-through
-    // fix-up pass should be executed to force spilling of such registers.
+    // clobbered by the call.  This is fine for live-in.
     Ops.push_back(Incoming);
   } else {
     // Otherwise, locate a spill slot and explicitly spill it so it
@@ -499,7 +494,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   };
 
   auto requireSpillSlot = [&](const Value *V) {
-    return !(LiveInDeopt || UseRegistersForDeoptValues) || isGCValue(V);
+    return !LiveInDeopt || isGCValue(V);
   };
 
   // Before we actually start lowering (and allocating spill slots for values),

diff  --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 7463caf2fd00..a351efc6659d 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -204,8 +204,6 @@ static cl::opt<std::string>
                   cl::desc("Stop compilation before a specific pass"),
                   cl::value_desc("pass-name"), cl::init(""), cl::Hidden);
 
-extern cl::opt<bool> UseRegistersForDeoptValues;
-
 /// Allow standard passes to be disabled by command line options. This supports
 /// simple binary flags that either suppress the pass or do nothing.
 /// i.e. -disable-mypass=false has no effect.
@@ -913,9 +911,6 @@ void TargetPassConfig::addMachinePasses() {
   // Run post-ra passes.
   addPostRegAlloc();
 
-  if (UseRegistersForDeoptValues)
-    addPass(&FixupStatepointCallerSavedID);
-
   // Insert prolog/epilog code.  Eliminate abstract frame index references...
   if (getOptLevel() != CodeGenOpt::None) {
     addPass(&PostRAMachineSinkingID);

diff  --git a/llvm/test/CodeGen/X86/statepoint-regs.ll b/llvm/test/CodeGen/X86/statepoint-regs.ll
deleted file mode 100644
index e3333bd51ab7..000000000000
--- a/llvm/test/CodeGen/X86/statepoint-regs.ll
+++ /dev/null
@@ -1,679 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs -O3 -use-registers-for-deopt-values -restrict-statepoint-remat=true < %s | FileCheck %s
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.11.0"
-
-declare void @bar() #0
-declare void @baz()
-
-; Spill caller saved register for %a.
-define void @test1(i32 %a) gc "statepoint-example" {
-; CHECK-LABEL: test1:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    callq _bar ## 4-byte Folded Reload
-; CHECK-NEXT:  Ltmp0:
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    retq
-entry:
-  %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 1, i32 %a)
-  ret void
-}
-
-; Callee save registers are ok.
-define void @test2(i32 %a, i32 %b) gc "statepoint-example" {
-; CHECK-LABEL: test2:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    pushq %rbp
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset %rbx, -24
-; CHECK-NEXT:    .cfi_offset %rbp, -16
-; CHECK-NEXT:    movl %esi, %ebx
-; CHECK-NEXT:    movl %edi, %ebp
-; CHECK-NEXT:    callq _bar
-; CHECK-NEXT:  Ltmp1:
-; CHECK-NEXT:    callq _bar
-; CHECK-NEXT:  Ltmp2:
-; CHECK-NEXT:    addq $8, %rsp
-; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:    retq
-entry:
-  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 2, i32 %a, i32 %b)
-  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 2, i32 %b, i32 %a)
-  ret void
-}
-
-; Arguments in caller saved registers, so they must be spilled.
-define void @test3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i) gc "statepoint-example" {
-; CHECK-LABEL: test3:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    subq $24, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    movl %r9d, (%rsp) ## 4-byte Spill
-; CHECK-NEXT:    callq _bar ## 24-byte Folded Reload
-; CHECK-NEXT:  Ltmp3:
-; CHECK-NEXT:    addq $24, %rsp
-; CHECK-NEXT:    retq
-entry:
-  %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 9, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i)
-  ret void
-}
-
-; This case just confirms that we don't crash when given more live values
-; than registers.  This is a case where we *have* to use a stack slot.  This
-; also ends up being a good test of whether we can fold loads from immutable
-; stack slots into the statepoint.
-define void @test4(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" {
-; CHECK-LABEL: test4:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    subq $24, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    movl %r9d, (%rsp) ## 4-byte Spill
-; CHECK-NEXT:    callq _bar ## 24-byte Folded Reload
-; CHECK-NEXT:  Ltmp4:
-; CHECK-NEXT:    addq $24, %rsp
-; CHECK-NEXT:    retq
-entry:
-  %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z)
-  ret void
-}
-
-; A gc-value must be spilled even if it is also a deopt value.
-define  i32 addrspace(1)* @test5(i32 %a, i32 addrspace(1)* %p) gc "statepoint-example" {
-; CHECK-LABEL: test5:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    subq $16, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset %rbx, -16
-; CHECK-NEXT:    movl %edi, %ebx
-; CHECK-NEXT:    movq %rsi, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    callq _bar
-; CHECK-NEXT:  Ltmp5:
-; CHECK-NEXT:    callq _bar
-; CHECK-NEXT:  Ltmp6:
-; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT:    addq $16, %rsp
-; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:    retq
-entry:
-  %token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 1, i32 %a, i32 addrspace(1)* %p, i32 addrspace(1)* %p)
-  %p2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token,  i32 9, i32 9)
-  %token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 1, i32 %a, i32 addrspace(1)* %p2, i32 addrspace(1)* %p2)
-  %p3 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token2,  i32 9, i32 9)
-  ret i32 addrspace(1)* %p3
-}
-
-; Callee saved are ok again.
-define void @test6(i32 %a) gc "statepoint-example" {
-; CHECK-LABEL: test6:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset %rbx, -16
-; CHECK-NEXT:    movl %edi, %ebx
-; CHECK-NEXT:    callq _baz
-; CHECK-NEXT:  Ltmp7:
-; CHECK-NEXT:    callq _bar
-; CHECK-NEXT:  Ltmp8:
-; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:    retq
-entry:
-  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @baz, i32 0, i32 0, i32 0, i32 1, i32 %a)
-  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 1, i32 %a)
-  ret void
-}
-
-; Many deopt values.
-define void @test7(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" {
-; The code for this is terrible, check simply for correctness for the moment
-; CHECK-LABEL: test7:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    pushq %rbp
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    pushq %r15
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; CHECK-NEXT:    pushq %r14
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    pushq %r13
-; CHECK-NEXT:    .cfi_def_cfa_offset 40
-; CHECK-NEXT:    pushq %r12
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 56
-; CHECK-NEXT:    subq $168, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 224
-; CHECK-NEXT:    .cfi_offset %rbx, -56
-; CHECK-NEXT:    .cfi_offset %r12, -48
-; CHECK-NEXT:    .cfi_offset %r13, -40
-; CHECK-NEXT:    .cfi_offset %r14, -32
-; CHECK-NEXT:    .cfi_offset %r15, -24
-; CHECK-NEXT:    .cfi_offset %rbp, -16
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl %edi, %edi
-; CHECK-NEXT:    movl %esi, %esi
-; CHECK-NEXT:    movl %edx, %edx
-; CHECK-NEXT:    movl %ecx, %ecx
-; CHECK-NEXT:    movl %r8d, %r8d
-; CHECK-NEXT:    movl %r9d, %r9d
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %ebp
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %r13d
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %r12d
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %r15d
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %r14d
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    callq _bar ## 160-byte Folded Reload
-; CHECK-NEXT:  Ltmp9:
-; CHECK-NEXT:    addq $168, %rsp
-; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:    popq %r12
-; CHECK-NEXT:    popq %r13
-; CHECK-NEXT:    popq %r14
-; CHECK-NEXT:    popq %r15
-; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:    retq
-entry:
-  %a64 = zext i32 %a to i64
-  %b64 = zext i32 %b to i64
-  %c64 = zext i32 %c to i64
-  %d64 = zext i32 %d to i64
-  %e64 = zext i32 %e to i64
-  %f64 = zext i32 %f to i64
-  %g64 = zext i32 %g to i64
-  %h64 = zext i32 %h to i64
-  %i64 = zext i32 %i to i64
-  %j64 = zext i32 %j to i64
-  %k64 = zext i32 %k to i64
-  %l64 = zext i32 %l to i64
-  %m64 = zext i32 %m to i64
-  %n64 = zext i32 %n to i64
-  %o64 = zext i32 %o to i64
-  %p64 = zext i32 %p to i64
-  %q64 = zext i32 %q to i64
-  %r64 = zext i32 %r to i64
-  %s64 = zext i32 %s to i64
-  %t64 = zext i32 %t to i64
-  %u64 = zext i32 %u to i64
-  %v64 = zext i32 %v to i64
-  %w64 = zext i32 %w to i64
-  %x64 = zext i32 %x to i64
-  %y64 = zext i32 %y to i64
-  %z64 = zext i32 %z to i64
-  %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i64 %a64, i64 %b64, i64 %c64, i64 %d64, i64 %e64, i64 %f64, i64 %g64, i64 %h64, i64 %i64, i64 %j64, i64 %k64, i64 %l64, i64 %m64, i64 %n64, i64 %o64, i64 %p64, i64 %q64, i64 %r64, i64 %s64, i64 %t64, i64 %u64, i64 %v64, i64 %w64, i64 %x64, i64 %y64, i64 %z64)
-  ret void
-}
-
-; a variant of test7 with mixed types chosen to exercise register aliases
-define void @test8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" {
-; The code for this is terrible, check simply for correctness for the moment
-; CHECK-LABEL: test8:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    pushq %rbp
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    pushq %r15
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; CHECK-NEXT:    pushq %r14
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    pushq %r13
-; CHECK-NEXT:    .cfi_def_cfa_offset 40
-; CHECK-NEXT:    pushq %r12
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 56
-; CHECK-NEXT:    subq $104, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 160
-; CHECK-NEXT:    .cfi_offset %rbx, -56
-; CHECK-NEXT:    .cfi_offset %r12, -48
-; CHECK-NEXT:    .cfi_offset %r13, -40
-; CHECK-NEXT:    .cfi_offset %r14, -32
-; CHECK-NEXT:    .cfi_offset %r15, -24
-; CHECK-NEXT:    .cfi_offset %rbp, -16
-; CHECK-NEXT:    movl %r9d, %r10d
-; CHECK-NEXT:    movl %r8d, %r9d
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %ebp
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %r13d
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %r12d
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %r15d
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %r14d
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %r8d
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movb %dil, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill
-; CHECK-NEXT:    movb %sil, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill
-; CHECK-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill
-; CHECK-NEXT:    movb %cl, (%rsp) ## 1-byte Spill
-; CHECK-NEXT:    movw %r9w, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; CHECK-NEXT:    movw %r10w, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    callq _bar ## 104-byte Folded Reload
-; CHECK-NEXT:  Ltmp10:
-; CHECK-NEXT:    addq $104, %rsp
-; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:    popq %r12
-; CHECK-NEXT:    popq %r13
-; CHECK-NEXT:    popq %r14
-; CHECK-NEXT:    popq %r15
-; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:    retq
-entry:
-  %a8 = trunc i32 %a to i8
-  %b8 = trunc i32 %b to i8
-  %c8 = trunc i32 %c to i8
-  %d8 = trunc i32 %d to i8
-  %e16 = trunc i32 %e to i16
-  %f16 = trunc i32 %f to i16
-  %g16 = trunc i32 %g to i16
-  %h16 = trunc i32 %h to i16
-  %i64 = zext i32 %i to i64
-  %j64 = zext i32 %j to i64
-  %k64 = zext i32 %k to i64
-  %l64 = zext i32 %l to i64
-  %m64 = zext i32 %m to i64
-  %n64 = zext i32 %n to i64
-  %o64 = zext i32 %o to i64
-  %p64 = zext i32 %p to i64
-  %q64 = zext i32 %q to i64
-  %r64 = zext i32 %r to i64
-  %s64 = zext i32 %s to i64
-  %t64 = zext i32 %t to i64
-  %u64 = zext i32 %u to i64
-  %v64 = zext i32 %v to i64
-  %w64 = zext i32 %w to i64
-  %x64 = zext i32 %x to i64
-  %y64 = zext i32 %y to i64
-  %z64 = zext i32 %z to i64
-  %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i8 %a8, i8 %b8, i8 %c8, i8 %d8, i16 %e16, i16 %f16, i16 %g16, i16 %h16, i64 %i64, i64 %j64, i64 %k64, i64 %l64, i64 %m64, i64 %n64, i64 %o64, i64 %p64, i64 %q64, i64 %r64, i64 %s64, i64 %t64, i64 %u64, i64 %v64, i64 %w64, i64 %x64, i64 %y64, i64 %z64)
-  ret void
-}
-
-; Test perfect forwarding of argument registers and stack slots to the
-; deopt bundle uses
-define void @test9(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" {
-; CHECK-LABEL: test9:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    subq $24, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    movl %r9d, (%rsp) ## 4-byte Spill
-; CHECK-NEXT:    callq _bar ## 24-byte Folded Reload
-; CHECK-NEXT:  Ltmp11:
-; CHECK-NEXT:    addq $24, %rsp
-; CHECK-NEXT:    retq
-
-entry:
-  %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z)
-  ret void
-}
-
-; Test enough folding of argument slots when we have one call which clobbers
-; registers before a second which needs them - i.e. we must do something with
-; arguments originally passed in registers
-define void @test10(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" {
-; FIXME (minor): It would be better to just spill (and fold reload) for
-; argument registers then spill and fill all the CSRs.
-; CHECK-LABEL: test10:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    pushq %rbp
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    pushq %r15
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; CHECK-NEXT:    pushq %r14
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    pushq %r13
-; CHECK-NEXT:    .cfi_def_cfa_offset 40
-; CHECK-NEXT:    pushq %r12
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 56
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-NEXT:    .cfi_offset %rbx, -56
-; CHECK-NEXT:    .cfi_offset %r12, -48
-; CHECK-NEXT:    .cfi_offset %r13, -40
-; CHECK-NEXT:    .cfi_offset %r14, -32
-; CHECK-NEXT:    .cfi_offset %r15, -24
-; CHECK-NEXT:    .cfi_offset %rbp, -16
-; CHECK-NEXT:    movl %r9d, %r15d
-; CHECK-NEXT:    movl %r8d, %r14d
-; CHECK-NEXT:    movl %ecx, %r12d
-; CHECK-NEXT:    movl %edx, %r13d
-; CHECK-NEXT:    movl %esi, %ebx
-; CHECK-NEXT:    movl %edi, %ebp
-; CHECK-NEXT:    callq _bar
-; CHECK-NEXT:  Ltmp12:
-; CHECK-NEXT:    callq _bar
-; CHECK-NEXT:  Ltmp13:
-; CHECK-NEXT:    addq $8, %rsp
-; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:    popq %r12
-; CHECK-NEXT:    popq %r13
-; CHECK-NEXT:    popq %r14
-; CHECK-NEXT:    popq %r15
-; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:    retq
-
-entry:
-  %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z)
-  %statepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z)
-  ret void
-}
-
-; Check that we can remat some uses of a def despite not remating before the
-; statepoint user.
-define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" {
-; FIXME: The codegen for this is correct, but horrible.  Lots of room for
-; improvement if we so desire.
-; CHECK-LABEL: test11:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    pushq %rbp
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    pushq %r15
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; CHECK-NEXT:    pushq %r14
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    pushq %r13
-; CHECK-NEXT:    .cfi_def_cfa_offset 40
-; CHECK-NEXT:    pushq %r12
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 56
-; CHECK-NEXT:    subq $168, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 224
-; CHECK-NEXT:    .cfi_offset %rbx, -56
-; CHECK-NEXT:    .cfi_offset %r12, -48
-; CHECK-NEXT:    .cfi_offset %r13, -40
-; CHECK-NEXT:    .cfi_offset %r14, -32
-; CHECK-NEXT:    .cfi_offset %r15, -24
-; CHECK-NEXT:    .cfi_offset %rbp, -16
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl %edi, %ebx
-; CHECK-NEXT:    movl %esi, %r15d
-; CHECK-NEXT:    movl %edx, %r12d
-; CHECK-NEXT:    movl %ecx, %r13d
-; CHECK-NEXT:    movl %r8d, %ebp
-; CHECK-NEXT:    movl %r9d, %r14d
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    callq _bar ## 160-byte Folded Reload
-; CHECK-NEXT:  Ltmp14:
-; CHECK-NEXT:    addq %r15, %rbx
-; CHECK-NEXT:    addq %r12, %rbx
-; CHECK-NEXT:    addq %r13, %rbx
-; CHECK-NEXT:    addq %rbp, %rbx
-; CHECK-NEXT:    addq %r14, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    movq %rbx, %rax
-; CHECK-NEXT:    addq $168, %rsp
-; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:    popq %r12
-; CHECK-NEXT:    popq %r13
-; CHECK-NEXT:    popq %r14
-; CHECK-NEXT:    popq %r15
-; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:    retq
-
-entry:
-  %a64 = zext i32 %a to i64
-  %b64 = zext i32 %b to i64
-  %c64 = zext i32 %c to i64
-  %d64 = zext i32 %d to i64
-  %e64 = zext i32 %e to i64
-  %f64 = zext i32 %f to i64
-  %g64 = zext i32 %g to i64
-  %h64 = zext i32 %h to i64
-  %i64 = zext i32 %i to i64
-  %j64 = zext i32 %j to i64
-  %k64 = zext i32 %k to i64
-  %l64 = zext i32 %l to i64
-  %m64 = zext i32 %m to i64
-  %n64 = zext i32 %n to i64
-  %o64 = zext i32 %o to i64
-  %p64 = zext i32 %p to i64
-  %q64 = zext i32 %q to i64
-  %r64 = zext i32 %r to i64
-  %s64 = zext i32 %s to i64
-  %t64 = zext i32 %t to i64
-  %u64 = zext i32 %u to i64
-  %v64 = zext i32 %v to i64
-  %w64 = zext i32 %w to i64
-  %x64 = zext i32 %x to i64
-  %y64 = zext i32 %y to i64
-  %z64 = zext i32 %z to i64
-  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i64 0, i64 26, i64 %a64, i64 %b64, i64 %c64, i64 %d64, i64 %e64, i64 %f64, i64 %g64, i64 %h64, i64 %i64, i64 %j64, i64 %k64, i64 %l64, i64 %m64, i64 %n64, i64 %o64, i64 %p64, i64 %q64, i64 %r64, i64 %s64, i64 %t64, i64 %u64, i64 %v64, i64 %w64, i64 %x64, i64 %y64, i64 %z64)
-  %addab = add i64 %a64, %b64
-  %addc = add i64 %addab, %c64
-  %addd = add i64 %addc, %d64
-  %adde = add i64 %addd, %e64
-  %addf = add i64 %adde, %f64
-  %addg = add i64 %addf, %g64
-  %addh = add i64 %addg, %h64
-  %addi = add i64 %addh, %i64
-  %addj = add i64 %addi, %j64
-  %addk = add i64 %addj, %k64
-  %addl = add i64 %addk, %l64
-  %addm = add i64 %addl, %m64
-  %addn = add i64 %addm, %n64
-  %addo = add i64 %addn, %o64
-  %addp = add i64 %addo, %p64
-  %addq = add i64 %addp, %q64
-  %addr = add i64 %addq, %r64
-  %adds = add i64 %addr, %s64
-  %addt = add i64 %adds, %t64
-  %addu = add i64 %addt, %u64
-  %addv = add i64 %addu, %v64
-  %addw = add i64 %addv, %w64
-  %addx = add i64 %addw, %x64
-  %addy = add i64 %addx, %y64
-  %addz = add i64 %addy, %z64
-  ret i64 %addz
-}
-
-; Demonstrate address of a function (w/ spilling due to caller saved register is used)
-define void @addr_func() gc "statepoint-example" {
-; CHECK-LABEL: addr_func:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movq _bar@{{.*}}(%rip), %rax
-; CHECK-NEXT:    movq %rax, (%rsp) ## 8-byte Spill
-; CHECK-NEXT:    callq _bar ## 8-byte Folded Reload
-; CHECK-NEXT:  Ltmp15:
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    retq
-entry:
-  %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i64 0, i64 3, void ()* @bar, void ()* @bar, void ()* @bar)
-  ret void
-}
-
-; Demonstrate address of a global (w/ spilling due to caller saved register is used)
- at G = external global i32
-define void @addr_global() gc "statepoint-example" {
-; CHECK-LABEL: addr_global:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movq _G@{{.*}}(%rip), %rax
-; CHECK-NEXT:    movq %rax, (%rsp) ## 8-byte Spill
-; CHECK-NEXT:    callq _bar ## 8-byte Folded Reload
-; CHECK-NEXT:  Ltmp16:
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    retq
-entry:
-  %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i64 0, i64 3, i32* @G, i32* @G, i32* @G)
-  ret void
-}
-
-define void @addr_alloca(i32 %v) gc "statepoint-example" {
-; CHECK-LABEL: addr_alloca:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movl %edi, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    callq _bar
-; CHECK-NEXT:  Ltmp17:
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    retq
-entry:
-  %a = alloca i32
-  store i32 %v, i32* %a
-  %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i64 0, i64 3, i32* %a, i32* %a, i32* %a)
-  ret void
-}
-
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
-
-attributes #0 = { "deopt-lowering"="live-in" }
-attributes #1 = { "deopt-lowering"="live-through" }