[llvm] 4275eb1 - Re-land [Codegen/Statepoint] Allow usage of registers for non gc deopt values.
Serguei Katkov via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 9 20:13:59 PDT 2020
Author: Serguei Katkov
Date: 2020-04-10T10:13:39+07:00
New Revision: 4275eb13315fbc59b8976667b774de2a1510fc36
URL: https://github.com/llvm/llvm-project/commit/4275eb13315fbc59b8976667b774de2a1510fc36
DIFF: https://github.com/llvm/llvm-project/commit/4275eb13315fbc59b8976667b774de2a1510fc36.diff
LOG: Re-land [Codegen/Statepoint] Allow usage of registers for non gc deopt values.
The change introduces the usage of physical registers for non-gc deopt values.
This require runtime support to know how to take a value from register.
By default usage is off and can be switched on by option.
The change also introduces additional fix-up patch which forces the spilling
of caller saved registers (clobbered after the call) and re-writes statepoint
to use spill slots instead of caller saved registers.
Reviewers: reames, danstrushin
Reviewed By: dantrushin
Subscribers: mgorny, hiraditya, mgrang, llvm-commits
Differential Revision: https://reviews.llvm.org/D77797
Added:
llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
llvm/test/CodeGen/X86/statepoint-regs.ll
Modified:
llvm/include/llvm/CodeGen/Passes.h
llvm/include/llvm/InitializePasses.h
llvm/lib/CodeGen/CMakeLists.txt
llvm/lib/CodeGen/CodeGen.cpp
llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
llvm/lib/CodeGen/TargetPassConfig.cpp
llvm/test/CodeGen/AArch64/O0-pipeline.ll
llvm/test/CodeGen/AArch64/O3-pipeline.ll
llvm/test/CodeGen/ARM/O3-pipeline.ll
llvm/test/CodeGen/X86/O0-pipeline.ll
llvm/test/CodeGen/X86/O3-pipeline.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 315cbf6ad099..2d76def4f2d4 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -478,6 +478,10 @@ namespace llvm {
/// Creates MIR Strip Debug pass. \see MachineStripDebug.cpp
ModulePass *createStripDebugMachineModulePass();
+
+ /// The pass fixups statepoint machine instruction to replace usage of
+ /// caller saved registers with stack slots.
+ extern char &FixupStatepointCallerSavedID;
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 21adf2e70db8..b39a497522d1 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -152,6 +152,7 @@ void initializeExternalAAWrapperPassPass(PassRegistry&);
void initializeFEntryInserterPass(PassRegistry&);
void initializeFinalizeISelPass(PassRegistry&);
void initializeFinalizeMachineBundlesPass(PassRegistry&);
+void initializeFixupStatepointCallerSavedPass(PassRegistry&);
void initializeFlattenCFGPassPass(PassRegistry&);
void initializeFloat2IntLegacyPassPass(PassRegistry&);
void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&);
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index ad7458fb903a..fd9e6f210061 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -30,6 +30,7 @@ add_llvm_component_library(LLVMCodeGen
FaultMaps.cpp
FEntryInserter.cpp
FinalizeISel.cpp
+ FixupStatepointCallerSaved.cpp
FuncletLayout.cpp
GCMetadata.cpp
GCMetadataPrinter.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 69f6a7d6cb9e..7a8c022c82da 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -39,6 +39,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeFEntryInserterPass(Registry);
initializeFinalizeISelPass(Registry);
initializeFinalizeMachineBundlesPass(Registry);
+ initializeFixupStatepointCallerSavedPass(Registry);
initializeFuncletLayoutPass(Registry);
initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
new file mode 100644
index 000000000000..29478e9eeec9
--- /dev/null
+++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -0,0 +1,310 @@
+//===-- FixupStatepointCallerSaved.cpp - Fixup caller saved registers ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Statepoint instruction in deopt parameters contains values which are
+/// meaningful to the runtime and should be able to be read at the moment the
+/// call returns. So we can say that we need to encode the fact that these
+/// values are "late read" by runtime. If we could express this notion for
+/// register allocator it would produce the right form for us.
+/// The need to fixup (i.e this pass) is specifically handling the fact that
+/// we cannot describe such a late read for the register allocator.
+/// Register allocator may put the value on a register clobbered by the call.
+/// This pass forces the spill of such registers and replaces corresponding
+/// statepoint operands to added spill slots.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "fixup-statepoint-caller-saved"
+STATISTIC(NumSpilledRegisters, "Number of spilled register");
+STATISTIC(NumSpillSlotsAllocated, "Number of spill slots allocated");
+STATISTIC(NumSpillSlotsExtended, "Number of spill slots extended");
+
+static cl::opt<bool> FixupSCSExtendSlotSize(
+ "fixup-scs-extend-slot-size", cl::Hidden, cl::init(false),
+ cl::desc("Allow spill in spill slot of greater size than register size"),
+ cl::Hidden);
+
+namespace {
+
+class FixupStatepointCallerSaved : public MachineFunctionPass {
+public:
+ static char ID;
+
+ FixupStatepointCallerSaved() : MachineFunctionPass(ID) {
+ initializeFixupStatepointCallerSavedPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override {
+ return "Fixup Statepoint Caller Saved";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // End anonymous namespace.
+
+char FixupStatepointCallerSaved::ID = 0;
+char &llvm::FixupStatepointCallerSavedID = FixupStatepointCallerSaved::ID;
+
+INITIALIZE_PASS_BEGIN(FixupStatepointCallerSaved, DEBUG_TYPE,
+ "Fixup Statepoint Caller Saved", false, false)
+INITIALIZE_PASS_END(FixupStatepointCallerSaved, DEBUG_TYPE,
+ "Fixup Statepoint Caller Saved", false, false)
+
+// Utility function to get size of the register.
+static unsigned getRegisterSize(const TargetRegisterInfo &TRI, Register Reg) {
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
+ return TRI.getSpillSize(*RC);
+}
+
+// Cache used frame indexes during statepoint re-write to re-use them in
+// processing next statepoint instruction.
+// Two strategies. One is to preserve the size of spill slot while another one
+// extends the size of spill slots to reduce the number of them, causing
+// the less total frame size. But unspill will have "implicit" any extend.
+class FrameIndexesCache {
+private:
+ struct FrameIndexesPerSize {
+ // List of used frame indexes during processing previous statepoints.
+ SmallVector<int, 8> Slots;
+ // Current index of un-used yet frame index.
+ unsigned Index = 0;
+ };
+ MachineFrameInfo &MFI;
+ const TargetRegisterInfo &TRI;
+ // Map size to list of frame indexes of this size. If the mode is
+ // FixupSCSExtendSlotSize then the key 0 is used to keep all frame indexes.
+ // If the size of required spill slot is greater than in a cache then the
+ // size will be increased.
+ DenseMap<unsigned, FrameIndexesPerSize> Cache;
+
+public:
+ FrameIndexesCache(MachineFrameInfo &MFI, const TargetRegisterInfo &TRI)
+ : MFI(MFI), TRI(TRI) {}
+ // Reset the current state of used frame indexes. After invocation of
+ // this function all frame indexes are available for allocation.
+ void reset() {
+ for (auto &It : Cache)
+ It.second.Index = 0;
+ }
+ // Get frame index to spill the register.
+ int getFrameIndex(Register Reg) {
+ unsigned Size = getRegisterSize(TRI, Reg);
+ // In FixupSCSExtendSlotSize mode the bucket with 0 index is used
+ // for all sizes.
+ unsigned Bucket = FixupSCSExtendSlotSize ? 0 : Size;
+ FrameIndexesPerSize &Line = Cache[Bucket];
+ if (Line.Index < Line.Slots.size()) {
+ int FI = Line.Slots[Line.Index++];
+ // If all sizes are kept together we probably need to extend the
+ // spill slot size.
+ if (MFI.getObjectSize(FI) < Size) {
+ MFI.setObjectSize(FI, Size);
+ MFI.setObjectAlignment(FI, Align(Size));
+ NumSpillSlotsExtended++;
+ }
+ return FI;
+ }
+ int FI = MFI.CreateSpillStackObject(Size, Size);
+ NumSpillSlotsAllocated++;
+ Line.Slots.push_back(FI);
+ ++Line.Index;
+ return FI;
+ }
+ // Sort all registers to spill in descendent order. In the
+ // FixupSCSExtendSlotSize mode it will minimize the total frame size.
+ // In non FixupSCSExtendSlotSize mode we can skip this step.
+ void sortRegisters(SmallVectorImpl<Register> &Regs) {
+ if (!FixupSCSExtendSlotSize)
+ return;
+ llvm::sort(Regs.begin(), Regs.end(), [&](Register &A, Register &B) {
+ return getRegisterSize(TRI, A) > getRegisterSize(TRI, B);
+ });
+ }
+};
+
+// Describes the state of the current processing statepoint instruction.
+class StatepointState {
+private:
+ // statepoint instruction.
+ MachineInstr &MI;
+ MachineFunction &MF;
+ const TargetRegisterInfo &TRI;
+ const TargetInstrInfo &TII;
+ MachineFrameInfo &MFI;
+ // Mask with callee saved registers.
+ const uint32_t *Mask;
+ // Cache of frame indexes used on previous instruction processing.
+ FrameIndexesCache &CacheFI;
+ // Operands with physical registers requiring spilling.
+ SmallVector<unsigned, 8> OpsToSpill;
+ // Set of register to spill.
+ SmallVector<Register, 8> RegsToSpill;
+ // Map Register to Frame Slot index.
+ DenseMap<Register, int> RegToSlotIdx;
+
+public:
+ StatepointState(MachineInstr &MI, const uint32_t *Mask,
+ FrameIndexesCache &CacheFI)
+ : MI(MI), MF(*MI.getMF()), TRI(*MF.getSubtarget().getRegisterInfo()),
+ TII(*MF.getSubtarget().getInstrInfo()), MFI(MF.getFrameInfo()),
+ Mask(Mask), CacheFI(CacheFI) {}
+ // Return true if register is callee saved.
+ bool isCalleeSaved(Register Reg) { return (Mask[Reg / 32] >> Reg % 32) & 1; }
+ // Iterates over statepoint meta args to find caller saver registers.
+ // Also cache the size of found registers.
+ // Returns true if caller save registers found.
+ bool findRegistersToSpill() {
+ SmallSet<Register, 8> VisitedRegs;
+ for (unsigned Idx = StatepointOpers(&MI).getVarIdx(),
+ EndIdx = MI.getNumOperands();
+ Idx < EndIdx; ++Idx) {
+ MachineOperand &MO = MI.getOperand(Idx);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ Register Reg = MO.getReg();
+ assert(Reg.isPhysical() && "Only physical regs are expected");
+ if (isCalleeSaved(Reg))
+ continue;
+ if (VisitedRegs.insert(Reg).second)
+ RegsToSpill.push_back(Reg);
+ OpsToSpill.push_back(Idx);
+ }
+ CacheFI.sortRegisters(RegsToSpill);
+ return !RegsToSpill.empty();
+ }
+ // Spill all caller saved registers right before statepoint instruction.
+ // Remember frame index where register is spilled.
+ void spillRegisters() {
+ for (Register Reg : RegsToSpill) {
+ int FI = CacheFI.getFrameIndex(Reg);
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*MI.getParent(), MI, Reg, true /*is_Kill*/, FI,
+ RC, &TRI);
+ NumSpilledRegisters++;
+ RegToSlotIdx[Reg] = FI;
+ }
+ }
+ // Re-write statepoint machine instruction to replace caller saved operands
+ // with indirect memory location (frame index).
+ void rewriteStatepoint() {
+ MachineInstr *NewMI =
+ MF.CreateMachineInstr(TII.get(MI.getOpcode()), MI.getDebugLoc(), true);
+ MachineInstrBuilder MIB(MF, NewMI);
+
+ // Add End marker.
+ OpsToSpill.push_back(MI.getNumOperands());
+ unsigned CurOpIdx = 0;
+
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (I == OpsToSpill[CurOpIdx]) {
+ int FI = RegToSlotIdx[MO.getReg()];
+ MIB.addImm(StackMaps::IndirectMemRefOp);
+ MIB.addImm(getRegisterSize(TRI, MO.getReg()));
+ assert(MO.isReg() && "Should be register");
+ assert(MO.getReg().isPhysical() && "Should be physical register");
+ MIB.addFrameIndex(FI);
+ MIB.addImm(0);
+ ++CurOpIdx;
+ } else
+ MIB.add(MO);
+ }
+ assert(CurOpIdx == (OpsToSpill.size() - 1) && "Not all operands processed");
+ // Add mem operands.
+ NewMI->setMemRefs(MF, MI.memoperands());
+ for (auto It : RegToSlotIdx) {
+ int FrameIndex = It.second;
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+ auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ getRegisterSize(TRI, It.first),
+ MFI.getObjectAlign(FrameIndex));
+ NewMI->addMemOperand(MF, MMO);
+ }
+ // Insert new statepoint and erase old one.
+ MI.getParent()->insert(MI, NewMI);
+ MI.eraseFromParent();
+ }
+};
+
+class StatepointProcessor {
+private:
+ MachineFunction &MF;
+ const TargetRegisterInfo &TRI;
+ FrameIndexesCache CacheFI;
+
+public:
+ StatepointProcessor(MachineFunction &MF)
+ : MF(MF), TRI(*MF.getSubtarget().getRegisterInfo()),
+ CacheFI(MF.getFrameInfo(), TRI) {}
+
+ bool process(MachineInstr &MI) {
+ unsigned VarIdx = StatepointOpers(&MI).getVarIdx();
+ uint64_t Flags =
+ MI.getOperand(VarIdx + StatepointOpers::FlagsOffset).getImm();
+ // Do nothing for LiveIn, it supports all registers.
+ if (Flags & (uint64_t)StatepointFlags::DeoptLiveIn)
+ return false;
+ CallingConv::ID CC =
+ MI.getOperand(VarIdx + StatepointOpers::CCOffset).getImm();
+ const uint32_t *Mask = TRI.getCallPreservedMask(MF, CC);
+ CacheFI.reset();
+ StatepointState SS(MI, Mask, CacheFI);
+
+ if (!SS.findRegistersToSpill())
+ return false;
+
+ SS.spillRegisters();
+ SS.rewriteStatepoint();
+ return true;
+ }
+};
+
+bool FixupStatepointCallerSaved::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ const Function &F = MF.getFunction();
+ if (!F.hasGC())
+ return false;
+
+ SmallVector<MachineInstr *, 16> Statepoints;
+ for (MachineBasicBlock &BB : MF)
+ for (MachineInstr &I : BB)
+ if (I.getOpcode() == TargetOpcode::STATEPOINT)
+ Statepoints.push_back(&I);
+
+ if (Statepoints.empty())
+ return false;
+
+ bool Changed = false;
+ StatepointProcessor SPP(MF);
+ for (MachineInstr *I : Statepoints)
+ Changed |= SPP.process(*I);
+ return Changed;
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index ffe35637af28..5c26b11397b9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -61,6 +61,10 @@ STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered");
STATISTIC(StatepointMaxSlotsRequired,
"Maximum number of stack slots required for a singe statepoint");
+cl::opt<bool> UseRegistersForDeoptValues(
+ "use-registers-for-deopt-values", cl::Hidden, cl::init(false),
+ cl::desc("Allow using registers for non pointer deopt args"));
+
static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops,
SelectionDAGBuilder &Builder, uint64_t Value) {
SDLoc L = Builder.getCurSDLoc();
@@ -409,7 +413,8 @@ lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot,
// end up folding some of these into stack references, but they'll be
// handled by the register allocator. Note that we do not have the notion
// of a late use so these values might be placed in registers which are
- // clobbered by the call. This is fine for live-in.
+ // clobbered by the call. This is fine for live-in. For live-through
+ // fix-up pass should be executed to force spilling of such registers.
Ops.push_back(Incoming);
} else {
// Otherwise, locate a spill slot and explicitly spill it so it
@@ -494,7 +499,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
};
auto requireSpillSlot = [&](const Value *V) {
- return !LiveInDeopt || isGCValue(V);
+ return !(LiveInDeopt || UseRegistersForDeoptValues) || isGCValue(V);
};
// Before we actually start lowering (and allocating spill slots for values),
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index a351efc6659d..44c326ecba8f 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -911,6 +911,8 @@ void TargetPassConfig::addMachinePasses() {
// Run post-ra passes.
addPostRegAlloc();
+ addPass(&FixupStatepointCallerSavedID);
+
// Insert prolog/epilog code. Eliminate abstract frame index references...
if (getOptLevel() != CodeGenOpt::None) {
addPass(&PostRAMachineSinkingID);
diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
index f9613d40d6bd..6d6ccea1e2ab 100644
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -56,6 +56,7 @@
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: Fast Register Allocator
+; CHECK-NEXT: Fixup Statepoint Caller Saved
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 65b88ee0aa99..1ae3be1de9d1 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -146,6 +146,7 @@
; CHECK-NEXT: Machine Loop Invariant Code Motion
; CHECK-NEXT: AArch64 Redundant Copy Elimination
; CHECK-NEXT: A57 FP Anti-dependency breaker
+; CHECK-NEXT: Fixup Statepoint Caller Saved
; CHECK-NEXT: PostRA Machine Sink
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Machine Natural Loop Construction
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index cfe15a72a846..31a437d90144 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -122,9 +122,11 @@
; CHECK-NEXT: Stack Slot Coloring
; CHECK-NEXT: Machine Copy Propagation Pass
; CHECK-NEXT: Machine Loop Invariant Code Motion
+; CHECK-NEXT: Fixup Statepoint Caller Saved
; CHECK-NEXT: PostRA Machine Sink
-; CHECK-NEXT: Machine Block Frequency Analysis
; CHECK-NEXT: MachineDominator Tree Construction
+; CHECK-NEXT: Machine Natural Loop Construction
+; CHECK-NEXT: Machine Block Frequency Analysis
; CHECK-NEXT: MachinePostDominator Tree Construction
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll
index 9af81b77a70b..b7a2974cd40f 100644
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -55,6 +55,7 @@
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: Bundle Machine CFG Edges
; CHECK-NEXT: X86 FP Stackifier
+; CHECK-NEXT: Fixup Statepoint Caller Saved
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
diff --git a/llvm/test/CodeGen/X86/O3-pipeline.ll b/llvm/test/CodeGen/X86/O3-pipeline.ll
index 97ce5082d4f7..64cb129a7eb9 100644
--- a/llvm/test/CodeGen/X86/O3-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O3-pipeline.ll
@@ -139,9 +139,11 @@
; CHECK-NEXT: Machine Loop Invariant Code Motion
; CHECK-NEXT: Bundle Machine CFG Edges
; CHECK-NEXT: X86 FP Stackifier
+; CHECK-NEXT: Fixup Statepoint Caller Saved
; CHECK-NEXT: PostRA Machine Sink
-; CHECK-NEXT: Machine Block Frequency Analysis
; CHECK-NEXT: MachineDominator Tree Construction
+; CHECK-NEXT: Machine Natural Loop Construction
+; CHECK-NEXT: Machine Block Frequency Analysis
; CHECK-NEXT: MachinePostDominator Tree Construction
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
diff --git a/llvm/test/CodeGen/X86/statepoint-regs.ll b/llvm/test/CodeGen/X86/statepoint-regs.ll
new file mode 100644
index 000000000000..e3333bd51ab7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/statepoint-regs.ll
@@ -0,0 +1,679 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -O3 -use-registers-for-deopt-values -restrict-statepoint-remat=true < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+declare void @bar() #0
+declare void @baz()
+
+; Spill caller saved register for %a.
+define void @test1(i32 %a) gc "statepoint-example" {
+; CHECK-LABEL: test1:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: callq _bar ## 4-byte Folded Reload
+; CHECK-NEXT: Ltmp0:
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+entry:
+ %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 1, i32 %a)
+ ret void
+}
+
+; Callee save registers are ok.
+define void @test2(i32 %a, i32 %b) gc "statepoint-example" {
+; CHECK-LABEL: test2:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %esi, %ebx
+; CHECK-NEXT: movl %edi, %ebp
+; CHECK-NEXT: callq _bar
+; CHECK-NEXT: Ltmp1:
+; CHECK-NEXT: callq _bar
+; CHECK-NEXT: Ltmp2:
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+entry:
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 2, i32 %a, i32 %b)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 2, i32 %b, i32 %a)
+ ret void
+}
+
+; Arguments in caller saved registers, so they must be spilled.
+define void @test3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i) gc "statepoint-example" {
+; CHECK-LABEL: test3:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %r9d, (%rsp) ## 4-byte Spill
+; CHECK-NEXT: callq _bar ## 24-byte Folded Reload
+; CHECK-NEXT: Ltmp3:
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: retq
+entry:
+ %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 9, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i)
+ ret void
+}
+
+; This case just confirms that we don't crash when given more live values
+; than registers. This is a case where we *have* to use a stack slot. This
+; also ends up being a good test of whether we can fold loads from immutable
+; stack slots into the statepoint.
+define void @test4(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" {
+; CHECK-LABEL: test4:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %r9d, (%rsp) ## 4-byte Spill
+; CHECK-NEXT: callq _bar ## 24-byte Folded Reload
+; CHECK-NEXT: Ltmp4:
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: retq
+entry:
+ %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z)
+ ret void
+}
+
+; A gc-value must be spilled even if it is also a deopt value.
+define i32 addrspace(1)* @test5(i32 %a, i32 addrspace(1)* %p) gc "statepoint-example" {
+; CHECK-LABEL: test5:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: subq $16, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset %rbx, -16
+; CHECK-NEXT: movl %edi, %ebx
+; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: callq _bar
+; CHECK-NEXT: Ltmp5:
+; CHECK-NEXT: callq _bar
+; CHECK-NEXT: Ltmp6:
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT: addq $16, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: retq
+entry:
+ %token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 1, i32 %a, i32 addrspace(1)* %p, i32 addrspace(1)* %p)
+ %p2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 9, i32 9)
+ %token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 1, i32 %a, i32 addrspace(1)* %p2, i32 addrspace(1)* %p2)
+ %p3 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token2, i32 9, i32 9)
+ ret i32 addrspace(1)* %p3
+}
+
+; Callee saved are ok again.
+define void @test6(i32 %a) gc "statepoint-example" {
+; CHECK-LABEL: test6:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset %rbx, -16
+; CHECK-NEXT: movl %edi, %ebx
+; CHECK-NEXT: callq _baz
+; CHECK-NEXT: Ltmp7:
+; CHECK-NEXT: callq _bar
+; CHECK-NEXT: Ltmp8:
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: retq
+entry:
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @baz, i32 0, i32 0, i32 0, i32 1, i32 %a)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 1, i32 %a)
+ ret void
+}
+
+; Many deopt values.
+define void @test7(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" {
+; The code for this is terrible, check simply for correctness for the moment
+; CHECK-LABEL: test7:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: subq $168, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 224
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl %edi, %edi
+; CHECK-NEXT: movl %esi, %esi
+; CHECK-NEXT: movl %edx, %edx
+; CHECK-NEXT: movl %ecx, %ecx
+; CHECK-NEXT: movl %r8d, %r8d
+; CHECK-NEXT: movl %r9d, %r9d
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebp
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r13d
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r12d
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r15d
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r14d
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r11d
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r10d
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: callq _bar ## 160-byte Folded Reload
+; CHECK-NEXT: Ltmp9:
+; CHECK-NEXT: addq $168, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+entry:
+ %a64 = zext i32 %a to i64
+ %b64 = zext i32 %b to i64
+ %c64 = zext i32 %c to i64
+ %d64 = zext i32 %d to i64
+ %e64 = zext i32 %e to i64
+ %f64 = zext i32 %f to i64
+ %g64 = zext i32 %g to i64
+ %h64 = zext i32 %h to i64
+ %i64 = zext i32 %i to i64
+ %j64 = zext i32 %j to i64
+ %k64 = zext i32 %k to i64
+ %l64 = zext i32 %l to i64
+ %m64 = zext i32 %m to i64
+ %n64 = zext i32 %n to i64
+ %o64 = zext i32 %o to i64
+ %p64 = zext i32 %p to i64
+ %q64 = zext i32 %q to i64
+ %r64 = zext i32 %r to i64
+ %s64 = zext i32 %s to i64
+ %t64 = zext i32 %t to i64
+ %u64 = zext i32 %u to i64
+ %v64 = zext i32 %v to i64
+ %w64 = zext i32 %w to i64
+ %x64 = zext i32 %x to i64
+ %y64 = zext i32 %y to i64
+ %z64 = zext i32 %z to i64
+ %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i64 %a64, i64 %b64, i64 %c64, i64 %d64, i64 %e64, i64 %f64, i64 %g64, i64 %h64, i64 %i64, i64 %j64, i64 %k64, i64 %l64, i64 %m64, i64 %n64, i64 %o64, i64 %p64, i64 %q64, i64 %r64, i64 %s64, i64 %t64, i64 %u64, i64 %v64, i64 %w64, i64 %x64, i64 %y64, i64 %z64)
+ ret void
+}
+
+; a variant of test7 with mixed types chosen to exercise register aliases
+define void @test8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" {
+; The code for this is terrible, check simply for correctness for the moment
+; CHECK-LABEL: test8:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: subq $104, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 160
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %r9d, %r10d
+; CHECK-NEXT: movl %r8d, %r9d
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebp
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r13d
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r12d
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r15d
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r14d
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r11d
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r8d
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movb %dil, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill
+; CHECK-NEXT: movb %sil, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill
+; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill
+; CHECK-NEXT: movb %cl, (%rsp) ## 1-byte Spill
+; CHECK-NEXT: movw %r9w, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; CHECK-NEXT: movw %r10w, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: callq _bar ## 104-byte Folded Reload
+; CHECK-NEXT: Ltmp10:
+; CHECK-NEXT: addq $104, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+entry:
+ %a8 = trunc i32 %a to i8
+ %b8 = trunc i32 %b to i8
+ %c8 = trunc i32 %c to i8
+ %d8 = trunc i32 %d to i8
+ %e16 = trunc i32 %e to i16
+ %f16 = trunc i32 %f to i16
+ %g16 = trunc i32 %g to i16
+ %h16 = trunc i32 %h to i16
+ %i64 = zext i32 %i to i64
+ %j64 = zext i32 %j to i64
+ %k64 = zext i32 %k to i64
+ %l64 = zext i32 %l to i64
+ %m64 = zext i32 %m to i64
+ %n64 = zext i32 %n to i64
+ %o64 = zext i32 %o to i64
+ %p64 = zext i32 %p to i64
+ %q64 = zext i32 %q to i64
+ %r64 = zext i32 %r to i64
+ %s64 = zext i32 %s to i64
+ %t64 = zext i32 %t to i64
+ %u64 = zext i32 %u to i64
+ %v64 = zext i32 %v to i64
+ %w64 = zext i32 %w to i64
+ %x64 = zext i32 %x to i64
+ %y64 = zext i32 %y to i64
+ %z64 = zext i32 %z to i64
+ %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i8 %a8, i8 %b8, i8 %c8, i8 %d8, i16 %e16, i16 %f16, i16 %g16, i16 %h16, i64 %i64, i64 %j64, i64 %k64, i64 %l64, i64 %m64, i64 %n64, i64 %o64, i64 %p64, i64 %q64, i64 %r64, i64 %s64, i64 %t64, i64 %u64, i64 %v64, i64 %w64, i64 %x64, i64 %y64, i64 %z64)
+ ret void
+}
+
+; Test perfect forwarding of argument registers and stack slots to the
+; deopt bundle uses
+define void @test9(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" {
+; CHECK-LABEL: test9:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl %r9d, (%rsp) ## 4-byte Spill
+; CHECK-NEXT: callq _bar ## 24-byte Folded Reload
+; CHECK-NEXT: Ltmp11:
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: retq
+
+entry:
+ %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z)
+ ret void
+}
+
+; Test enough folding of argument slots when we have one call which clobbers
+; registers before a second which needs them - i.e. we must do something with
+; arguments originally passed in registers
+define void @test10(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" {
+; FIXME (minor): It would be better to just spill (and fold reload) for
+; argument registers then spill and fill all the CSRs.
+; CHECK-LABEL: test10:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %r9d, %r15d
+; CHECK-NEXT: movl %r8d, %r14d
+; CHECK-NEXT: movl %ecx, %r12d
+; CHECK-NEXT: movl %edx, %r13d
+; CHECK-NEXT: movl %esi, %ebx
+; CHECK-NEXT: movl %edi, %ebp
+; CHECK-NEXT: callq _bar
+; CHECK-NEXT: Ltmp12:
+; CHECK-NEXT: callq _bar
+; CHECK-NEXT: Ltmp13:
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+
+entry:
+ %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z)
+ %statepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z)
+ ret void
+}
+
+; Check that we can remat some uses of a def despite not remating before the
+; statepoint user.
+define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" {
+; FIXME: The codegen for this is correct, but horrible. Lots of room for
+; improvement if we so desire.
+; CHECK-LABEL: test11:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: subq $168, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 224
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl %edi, %ebx
+; CHECK-NEXT: movl %esi, %r15d
+; CHECK-NEXT: movl %edx, %r12d
+; CHECK-NEXT: movl %ecx, %r13d
+; CHECK-NEXT: movl %r8d, %ebp
+; CHECK-NEXT: movl %r9d, %r14d
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: callq _bar ## 160-byte Folded Reload
+; CHECK-NEXT: Ltmp14:
+; CHECK-NEXT: addq %r15, %rbx
+; CHECK-NEXT: addq %r12, %rbx
+; CHECK-NEXT: addq %r13, %rbx
+; CHECK-NEXT: addq %rbp, %rbx
+; CHECK-NEXT: addq %r14, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: movq %rbx, %rax
+; CHECK-NEXT: addq $168, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+
+entry:
+ %a64 = zext i32 %a to i64
+ %b64 = zext i32 %b to i64
+ %c64 = zext i32 %c to i64
+ %d64 = zext i32 %d to i64
+ %e64 = zext i32 %e to i64
+ %f64 = zext i32 %f to i64
+ %g64 = zext i32 %g to i64
+ %h64 = zext i32 %h to i64
+ %i64 = zext i32 %i to i64
+ %j64 = zext i32 %j to i64
+ %k64 = zext i32 %k to i64
+ %l64 = zext i32 %l to i64
+ %m64 = zext i32 %m to i64
+ %n64 = zext i32 %n to i64
+ %o64 = zext i32 %o to i64
+ %p64 = zext i32 %p to i64
+ %q64 = zext i32 %q to i64
+ %r64 = zext i32 %r to i64
+ %s64 = zext i32 %s to i64
+ %t64 = zext i32 %t to i64
+ %u64 = zext i32 %u to i64
+ %v64 = zext i32 %v to i64
+ %w64 = zext i32 %w to i64
+ %x64 = zext i32 %x to i64
+ %y64 = zext i32 %y to i64
+ %z64 = zext i32 %z to i64
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i64 0, i64 26, i64 %a64, i64 %b64, i64 %c64, i64 %d64, i64 %e64, i64 %f64, i64 %g64, i64 %h64, i64 %i64, i64 %j64, i64 %k64, i64 %l64, i64 %m64, i64 %n64, i64 %o64, i64 %p64, i64 %q64, i64 %r64, i64 %s64, i64 %t64, i64 %u64, i64 %v64, i64 %w64, i64 %x64, i64 %y64, i64 %z64)
+ %addab = add i64 %a64, %b64
+ %addc = add i64 %addab, %c64
+ %addd = add i64 %addc, %d64
+ %adde = add i64 %addd, %e64
+ %addf = add i64 %adde, %f64
+ %addg = add i64 %addf, %g64
+ %addh = add i64 %addg, %h64
+ %addi = add i64 %addh, %i64
+ %addj = add i64 %addi, %j64
+ %addk = add i64 %addj, %k64
+ %addl = add i64 %addk, %l64
+ %addm = add i64 %addl, %m64
+ %addn = add i64 %addm, %n64
+ %addo = add i64 %addn, %o64
+ %addp = add i64 %addo, %p64
+ %addq = add i64 %addp, %q64
+ %addr = add i64 %addq, %r64
+ %adds = add i64 %addr, %s64
+ %addt = add i64 %adds, %t64
+ %addu = add i64 %addt, %u64
+ %addv = add i64 %addu, %v64
+ %addw = add i64 %addv, %w64
+ %addx = add i64 %addw, %x64
+ %addy = add i64 %addx, %y64
+ %addz = add i64 %addy, %z64
+ ret i64 %addz
+}
+
+; Demonstrate address of a function (w/ spilling due to caller saved register is used)
+define void @addr_func() gc "statepoint-example" {
+; CHECK-LABEL: addr_func:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movq _bar@{{.*}}(%rip), %rax
+; CHECK-NEXT: movq %rax, (%rsp) ## 8-byte Spill
+; CHECK-NEXT: callq _bar ## 8-byte Folded Reload
+; CHECK-NEXT: Ltmp15:
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+entry:
+ %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i64 0, i64 3, void ()* @bar, void ()* @bar, void ()* @bar)
+ ret void
+}
+
+; Demonstrate address of a global (w/ spilling due to caller saved register is used)
+ at G = external global i32
+define void @addr_global() gc "statepoint-example" {
+; CHECK-LABEL: addr_global:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movq _G@{{.*}}(%rip), %rax
+; CHECK-NEXT: movq %rax, (%rsp) ## 8-byte Spill
+; CHECK-NEXT: callq _bar ## 8-byte Folded Reload
+; CHECK-NEXT: Ltmp16:
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+entry:
+ %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i64 0, i64 3, i32* @G, i32* @G, i32* @G)
+ ret void
+}
+
+define void @addr_alloca(i32 %v) gc "statepoint-example" {
+; CHECK-LABEL: addr_alloca:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: callq _bar
+; CHECK-NEXT: Ltmp17:
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+entry:
+ %a = alloca i32
+ store i32 %v, i32* %a
+ %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i64 0, i64 3, i32* %a, i32* %a, i32* %a)
+ ret void
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
+
+attributes #0 = { "deopt-lowering"="live-in" }
+attributes #1 = { "deopt-lowering"="live-through" }
More information about the llvm-commits
mailing list