[llvm] 0a950a2 - [SystemZ/z/OS] Implement save of non-volatile registers on z/OS XPLINK
Kai Nacke via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 13 09:59:20 PDT 2021
Author: Kai Nacke
Date: 2021-10-13T12:57:57-04:00
New Revision: 0a950a2e94f2f5f1596a9c8af44d3bbd26497927
URL: https://github.com/llvm/llvm-project/commit/0a950a2e94f2f5f1596a9c8af44d3bbd26497927
DIFF: https://github.com/llvm/llvm-project/commit/0a950a2e94f2f5f1596a9c8af44d3bbd26497927.diff
LOG: [SystemZ/z/OS] Implement save of non-volatile registers on z/OS XPLINK
This PR implements the save of the XPLINK callee-saved registers
on z/OS.
Reviewed By: uweigand, Kai
Differential Revision: https://reviews.llvm.org/D111653
Added:
llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
Modified:
llvm/lib/Target/SystemZ/SystemZCallingConv.td
llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
llvm/lib/Target/SystemZ/SystemZFrameLowering.h
llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
llvm/lib/Target/SystemZ/SystemZSubtarget.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index 45e22b07be300..c606e78b69b6c 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -162,12 +162,14 @@ def CSR_SystemZ_NoRegs : CalleeSavedRegs<(add)>;
//===----------------------------------------------------------------------===//
// z/OS XPLINK64 callee-saved registers
//===----------------------------------------------------------------------===//
-def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 8, 15),
- (sequence "F%dD", 8, 15))>;
-
-def CSR_SystemZ_XPLINK64_Vector : CalleeSavedRegs<(add (sequence "R%dD", 8, 15),
- (sequence "F%dD", 15, 8),
- (sequence "V%d", 23, 16))>;
+// %R7D is volatile by the spec, but it must be saved in the prologue by
+// any non-leaf function and restored in the epilogue for use by the
+// return instruction so it functions exactly like a callee-saved register.
+def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 7, 15),
+ (sequence "F%dD", 15, 8))>;
+
+def CSR_SystemZ_XPLINK64_Vector : CalleeSavedRegs<(add CSR_SystemZ_XPLINK64,
+ (sequence "V%d", 23, 16))>;
//===----------------------------------------------------------------------===//
// z/OS XPLINK64 return value calling convention
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 83f05e55226e2..d11d118fb8ee9 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
namespace {
// The ABI-defined register save slots, relative to the CFA (i.e.
// incoming stack pointer + SystemZMC::ELFCallFrameSize).
-static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
+static const TargetFrameLowering::SpillSlot ELFSpillOffsetTable[] = {
{ SystemZ::R2D, 0x10 },
{ SystemZ::R3D, 0x18 },
{ SystemZ::R4D, 0x20 },
@@ -44,6 +44,12 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
{ SystemZ::F4D, 0x90 },
{ SystemZ::F6D, 0x98 }
};
+
+static const TargetFrameLowering::SpillSlot XPLINKSpillOffsetTable[] = {
+ {SystemZ::R4D, 0x00}, {SystemZ::R5D, 0x08}, {SystemZ::R6D, 0x10},
+ {SystemZ::R7D, 0x18}, {SystemZ::R8D, 0x20}, {SystemZ::R9D, 0x28},
+ {SystemZ::R10D, 0x30}, {SystemZ::R11D, 0x38}, {SystemZ::R12D, 0x40},
+ {SystemZ::R13D, 0x48}, {SystemZ::R14D, 0x50}, {SystemZ::R15D, 0x58}};
} // end anonymous namespace
SystemZFrameLowering::SystemZFrameLowering(StackDirection D, Align StackAl,
@@ -201,8 +207,9 @@ void SystemZELFFrameLowering::determineCalleeSaves(MachineFunction &MF,
SystemZELFFrameLowering::SystemZELFFrameLowering()
: SystemZFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), 0,
- Align(8), false /* StackRealignable */),
+ Align(8), /* StackRealignable */ false),
RegSpillOffsets(0) {
+
// Due to the SystemZ ABI, the DWARF CFA (Canonical Frame Address) is not
// equal to the incoming stack pointer, but to incoming stack pointer plus
// 160. Instead of using a Local Area Offset, the Register save area will
@@ -212,8 +219,8 @@ SystemZELFFrameLowering::SystemZELFFrameLowering()
// Create a mapping from register number to save slot offset.
// These offsets are relative to the start of the register save area.
RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
- for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I)
- RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset;
+ for (unsigned I = 0, E = array_lengthof(ELFSpillOffsetTable); I != E; ++I)
+ RegSpillOffsets[ELFSpillOffsetTable[I].Reg] = ELFSpillOffsetTable[I].Offset;
}
// Add GPR64 to the save instruction being built by MIB, which is in basic
@@ -812,7 +819,176 @@ bool SystemZELFFrameLowering::usePackedStack(MachineFunction &MF) const {
SystemZXPLINKFrameLowering::SystemZXPLINKFrameLowering()
: SystemZFrameLowering(TargetFrameLowering::StackGrowsUp, Align(32), 128,
- Align(32), false /* StackRealignable */) {}
+ Align(32), /* StackRealignable */ false),
+ RegSpillOffsets(-1) {
+
+ // Create a mapping from register number to save slot offset.
+ // These offsets are relative to the start of the local are area.
+ RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
+ for (unsigned I = 0, E = array_lengthof(XPLINKSpillOffsetTable); I != E; ++I)
+ RegSpillOffsets[XPLINKSpillOffsetTable[I].Reg] =
+ XPLINKSpillOffsetTable[I].Offset;
+}
+
+bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+
+ // Scan the call-saved GPRs and find the bounds of the register spill area.
+ unsigned LowGPR = 0;
+ int LowOffset = INT32_MAX;
+ unsigned HighGPR = LowGPR;
+ int HighOffset = -1;
+
+ unsigned RegSP = Regs.getStackPointerRegister();
+ auto &GRRegClass = SystemZ::GR64BitRegClass;
+ const unsigned RegSize = 8;
+
+ auto ProcessCSI = [&](std::vector<CalleeSavedInfo> &CSIList) {
+ for (auto &CS : CSIList) {
+ unsigned Reg = CS.getReg();
+ int Offset = RegSpillOffsets[Reg];
+ if (Offset >= 0) {
+ if (GRRegClass.contains(Reg)) {
+ if (LowOffset > Offset) {
+ LowOffset = Offset;
+ LowGPR = Reg;
+ }
+
+ if (Offset > HighOffset) {
+ HighOffset = Offset;
+ HighGPR = Reg;
+ }
+ }
+ int FrameIdx = MFFrame.CreateFixedSpillStackObject(RegSize, Offset);
+ CS.setFrameIdx(FrameIdx);
+ } else
+ CS.setFrameIdx(INT32_MAX);
+ }
+ };
+
+ std::vector<CalleeSavedInfo> Spills;
+
+ // For non-leaf functions:
+ // - the address of callee (entry point) register R6 must be saved
+ Spills.push_back(CalleeSavedInfo(Regs.getAddressOfCalleeRegister()));
+
+ // If the function needs a frame pointer, or if the backchain pointer should
+ // be stored, then save the stack pointer register R4.
+ if (hasFP(MF) || MF.getFunction().hasFnAttribute("backchain"))
+ Spills.push_back(CalleeSavedInfo(RegSP));
+
+ // Save the range of call-saved registers, for use by the
+ // prologue/epilogue inserters.
+ ProcessCSI(CSI);
+ MFI->setRestoreGPRRegs(LowGPR, HighGPR, LowOffset);
+
+ // Save the range of call-saved registers, for use by the epilogue inserter.
+ ProcessCSI(Spills);
+ MFI->setSpillGPRRegs(LowGPR, HighGPR, LowOffset);
+
+ // Create spill slots for the remaining registers.
+ for (auto &CS : CSI) {
+ if (CS.getFrameIdx() != INT32_MAX)
+ continue;
+ unsigned Reg = CS.getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ Align Alignment = TRI->getSpillAlign(*RC);
+ unsigned Size = TRI->getSpillSize(*RC);
+ Alignment = std::min(Alignment, getStackAlign());
+ int FrameIdx = MFFrame.CreateStackObject(Size, Alignment, true);
+ CS.setFrameIdx(FrameIdx);
+ }
+
+ return true;
+}
+
+void SystemZXPLINKFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
+ bool HasFP = hasFP(MF);
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+
+ // If the function requires a frame pointer, record that the hard
+ // frame pointer will be clobbered.
+ if (HasFP)
+ SavedRegs.set(Regs.getFramePointerRegister());
+
+ // If the function is not an XPLeaf function, we need to save the
+ // return address register. We also always use that register for
+ // the return instruction, so it needs to be restored in the
+ // epilogue even though that register is considered to be volatile.
+ // #TODO: Implement leaf detection.
+ SavedRegs.set(Regs.getReturnFunctionAddressRegister());
+}
+
+bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return true;
+
+ MachineFunction &MF = *MBB.getParent();
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+ SystemZ::GPRRegs SpillGPRs = ZFI->getSpillGPRRegs();
+ DebugLoc DL;
+
+ // Save GPRs
+ if (SpillGPRs.LowGPR) {
+ assert(SpillGPRs.LowGPR != SpillGPRs.HighGPR &&
+ "Should be saving multiple registers");
+
+ // Build an STM/STMG instruction.
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG));
+
+ // Add the explicit register operands.
+ addSavedGPR(MBB, MIB, SpillGPRs.LowGPR, false);
+ addSavedGPR(MBB, MIB, SpillGPRs.HighGPR, false);
+
+ // Add the address r4
+ MIB.addReg(Regs.getStackPointerRegister());
+
+ // Add the partial offset
+ // We cannot add the actual offset as, at the stack is not finalized
+ MIB.addImm(SpillGPRs.GPROffset);
+
+ // Make sure all call-saved GPRs are included as operands and are
+ // marked as live on entry.
+ auto &GRRegClass = SystemZ::GR64BitRegClass;
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (GRRegClass.contains(Reg))
+ addSavedGPR(MBB, MIB, Reg, true);
+ }
+ }
+
+ // Spill FPRs to the stack in the normal TargetInstrInfo way
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (SystemZ::FP64BitRegClass.contains(Reg)) {
+ MBB.addLiveIn(Reg);
+ TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+ &SystemZ::FP64BitRegClass, TRI);
+ }
+ if (SystemZ::VR128BitRegClass.contains(Reg)) {
+ MBB.addLiveIn(Reg);
+ TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+ &SystemZ::VR128BitRegClass, TRI);
+ }
+ }
+
+ return true;
+}
void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {}
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 9bef45fdc120d..6fddb4f81c416 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -10,6 +10,8 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H
#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZMachineFunctionInfo.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Support/TypeSize.h"
@@ -19,7 +21,6 @@ class SystemZTargetMachine;
class SystemZSubtarget;
class SystemZFrameLowering : public TargetFrameLowering {
-
public:
SystemZFrameLowering(StackDirection D, Align StackAl, int LAO, Align TransAl,
bool StackReal);
@@ -86,9 +87,24 @@ class SystemZELFFrameLowering : public SystemZFrameLowering {
};
class SystemZXPLINKFrameLowering : public SystemZFrameLowering {
+ IndexedMap<unsigned> RegSpillOffsets;
+
public:
SystemZXPLINKFrameLowering();
+ bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override;
+
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS) const override;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ ArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
+
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 0062e39602f56..48cec176b0069 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -190,7 +190,9 @@ bool SystemZRegisterInfo::getRegAllocationHints(
const MCPhysReg *
SystemZXPLINK64Registers::getCalleeSavedRegs(const MachineFunction *MF) const {
- return CSR_SystemZ_XPLINK64_SaveList;
+ const SystemZSubtarget &Subtarget = MF->getSubtarget<SystemZSubtarget>();
+ return Subtarget.hasVector() ? CSR_SystemZ_XPLINK64_Vector_SaveList
+ : CSR_SystemZ_XPLINK64_SaveList;
}
const MCPhysReg *
@@ -211,7 +213,9 @@ SystemZELFRegisters::getCalleeSavedRegs(const MachineFunction *MF) const {
const uint32_t *
SystemZXPLINK64Registers::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
- return CSR_SystemZ_XPLINK64_RegMask;
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ return Subtarget.hasVector() ? CSR_SystemZ_XPLINK64_Vector_RegMask
+ : CSR_SystemZ_XPLINK64_RegMask;
}
const uint32_t *
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 122504d4b44b3..2a4253e2deafa 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -10,6 +10,7 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZREGISTERINFO_H
#include "SystemZ.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#define GET_REGINFO_HEADER
@@ -44,9 +45,9 @@ inline bool isHighReg(unsigned int Reg) {
/// It is abstract, all calling conventions must override and
/// define the pure virtual member function defined in this class.
class SystemZCallingConventionRegisters {
+
public:
- /// \returns the register that keeps the
- /// return function address.
+ /// \returns the register that keeps the return function address.
virtual int getReturnFunctionAddressRegister() = 0;
/// \returns the register that keeps the
@@ -82,6 +83,8 @@ class SystemZXPLINK64Registers : public SystemZCallingConventionRegisters {
int getFramePointerRegister() override final { return SystemZ::R8D; };
+ int getAddressOfCalleeRegister() { return SystemZ::R6D; };
+
const MCPhysReg *
getCalleeSavedRegs(const MachineFunction *MF) const override final;
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index 0839ea0a60cb5..67c5b8eb09b69 100644
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -96,6 +96,10 @@ class SystemZSubtarget : public SystemZGenSubtargetInfo {
return SpecialRegisters.get();
}
+ template <class SR> SR &getSpecialRegisters() const {
+ return *static_cast<SR *>(getSpecialRegisters());
+ }
+
const TargetFrameLowering *getFrameLowering() const override {
return FrameLowering.get();
}
diff --git a/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll b/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
new file mode 100644
index 0000000000000..4934fee8410cc
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
@@ -0,0 +1,236 @@
+; Test the generated function prologs/epilogs under XPLINK64 on z/OS
+;
+; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z13 | FileCheck --check-prefixes=CHECK64,CHECK %s
+
+; Test prolog/epilog for non-XPLEAF.
+
+; Small stack frame.
+; CHECK-LABEL: func0
+; CHECK64: stmg 6, 7
+define void @func0() {
+ call i64 (i64) @fun(i64 10)
+ ret void
+}
+
+; Spill all GPR CSRs
+; CHECK-LABEL: func1
+; CHECK64: stmg 6, 15
+define void @func1(i64 *%ptr) {
+ %l01 = load volatile i64, i64 *%ptr
+ %l02 = load volatile i64, i64 *%ptr
+ %l03 = load volatile i64, i64 *%ptr
+ %l04 = load volatile i64, i64 *%ptr
+ %l05 = load volatile i64, i64 *%ptr
+ %l06 = load volatile i64, i64 *%ptr
+ %l07 = load volatile i64, i64 *%ptr
+ %l08 = load volatile i64, i64 *%ptr
+ %l09 = load volatile i64, i64 *%ptr
+ %l10 = load volatile i64, i64 *%ptr
+ %l11 = load volatile i64, i64 *%ptr
+ %l12 = load volatile i64, i64 *%ptr
+ %l13 = load volatile i64, i64 *%ptr
+ %l14 = load volatile i64, i64 *%ptr
+ %l15 = load volatile i64, i64 *%ptr
+ %add01 = add i64 %l01, %l01
+ %add02 = add i64 %l02, %add01
+ %add03 = add i64 %l03, %add02
+ %add04 = add i64 %l04, %add03
+ %add05 = add i64 %l05, %add04
+ %add06 = add i64 %l06, %add05
+ %add07 = add i64 %l07, %add06
+ %add08 = add i64 %l08, %add07
+ %add09 = add i64 %l09, %add08
+ %add10 = add i64 %l10, %add09
+ %add11 = add i64 %l11, %add10
+ %add12 = add i64 %l12, %add11
+ %add13 = add i64 %l13, %add12
+ %add14 = add i64 %l14, %add13
+ %add15 = add i64 %l15, %add14
+ store volatile i64 %add01, i64 *%ptr
+ store volatile i64 %add02, i64 *%ptr
+ store volatile i64 %add03, i64 *%ptr
+ store volatile i64 %add04, i64 *%ptr
+ store volatile i64 %add05, i64 *%ptr
+ store volatile i64 %add06, i64 *%ptr
+ store volatile i64 %add07, i64 *%ptr
+ store volatile i64 %add08, i64 *%ptr
+ store volatile i64 %add09, i64 *%ptr
+ store volatile i64 %add10, i64 *%ptr
+ store volatile i64 %add11, i64 *%ptr
+ store volatile i64 %add12, i64 *%ptr
+ store volatile i64 %add13, i64 *%ptr
+ store volatile i64 %add14, i64 *%ptr
+ store volatile i64 %add15, i64 *%ptr
+ ret void
+}
+
+
+; Spill all FPRs and VRs
+; CHECK-LABEL: func2
+; CHECK64: std 15, {{[0-9]+}}(4) * 8-byte Folded Spill
+; CHECK64: std 14, {{[0-9]+}}(4) * 8-byte Folded Spill
+; CHECK64: std 13, {{[0-9]+}}(4) * 8-byte Folded Spill
+; CHECK64: std 12, {{[0-9]+}}(4) * 8-byte Folded Spill
+; CHECK64: std 11, {{[0-9]+}}(4) * 8-byte Folded Spill
+; CHECK64: std 10, {{[0-9]+}}(4) * 8-byte Folded Spill
+; CHECK64: std 9, {{[0-9]+}}(4) * 8-byte Folded Spill
+; CHECK64: std 8, {{[0-9]+}}(4) * 8-byte Folded Spill
+; CHECK64: vst 23, {{[0-9]+}}(4), 4 * 16-byte Folded Spill
+; CHECK64: vst 22, {{[0-9]+}}(4), 4 * 16-byte Folded Spill
+; CHECK64: vst 21, {{[0-9]+}}(4), 4 * 16-byte Folded Spill
+; CHECK64: vst 20, {{[0-9]+}}(4), 4 * 16-byte Folded Spill
+; CHECK64: vst 19, {{[0-9]+}}(4), 4 * 16-byte Folded Spill
+; CHECK64: vst 18, {{[0-9]+}}(4), 4 * 16-byte Folded Spill
+; CHECK64: vst 17, {{[0-9]+}}(4), 4 * 16-byte Folded Spill
+; CHECK64: vst 16, {{[0-9]+}}(4), 4 * 16-byte Folded Spill
+define void @func2(double *%ptr, <2 x i64> *%vec_ptr) {
+ %l00 = load volatile double, double *%ptr
+ %l01 = load volatile double, double *%ptr
+ %l02 = load volatile double, double *%ptr
+ %l03 = load volatile double, double *%ptr
+ %l04 = load volatile double, double *%ptr
+ %l05 = load volatile double, double *%ptr
+ %l06 = load volatile double, double *%ptr
+ %l07 = load volatile double, double *%ptr
+ %l08 = load volatile double, double *%ptr
+ %l09 = load volatile double, double *%ptr
+ %l10 = load volatile double, double *%ptr
+ %l11 = load volatile double, double *%ptr
+ %l12 = load volatile double, double *%ptr
+ %l13 = load volatile double, double *%ptr
+ %l14 = load volatile double, double *%ptr
+ %l15 = load volatile double, double *%ptr
+ %add00 = fadd double %l01, %l00
+ %add01 = fadd double %l01, %add00
+ %add02 = fadd double %l02, %add01
+ %add03 = fadd double %l03, %add02
+ %add04 = fadd double %l04, %add03
+ %add05 = fadd double %l05, %add04
+ %add06 = fadd double %l06, %add05
+ %add07 = fadd double %l07, %add06
+ %add08 = fadd double %l08, %add07
+ %add09 = fadd double %l09, %add08
+ %add10 = fadd double %l10, %add09
+ %add11 = fadd double %l11, %add10
+ %add12 = fadd double %l12, %add11
+ %add13 = fadd double %l13, %add12
+ %add14 = fadd double %l14, %add13
+ %add15 = fadd double %l15, %add14
+ store volatile double %add00, double *%ptr
+ store volatile double %add01, double *%ptr
+ store volatile double %add02, double *%ptr
+ store volatile double %add03, double *%ptr
+ store volatile double %add04, double *%ptr
+ store volatile double %add05, double *%ptr
+ store volatile double %add06, double *%ptr
+ store volatile double %add07, double *%ptr
+ store volatile double %add08, double *%ptr
+ store volatile double %add09, double *%ptr
+ store volatile double %add10, double *%ptr
+ store volatile double %add11, double *%ptr
+ store volatile double %add12, double *%ptr
+ store volatile double %add13, double *%ptr
+ store volatile double %add14, double *%ptr
+ store volatile double %add15, double *%ptr
+
+ %v00 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v01 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v02 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v03 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v04 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v05 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v06 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v07 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v08 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v09 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v10 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v11 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v12 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v13 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v14 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v15 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v16 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v17 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v18 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v19 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v20 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v21 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v22 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v23 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v24 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v25 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v26 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v27 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v28 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v29 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v30 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %v31 = load volatile <2 x i64>, <2 x i64> *%vec_ptr
+ %vadd00 = add <2 x i64> %v00, %v00
+ %vadd01 = add <2 x i64> %v01, %vadd00
+ %vadd02 = add <2 x i64> %v02, %vadd01
+ %vadd03 = add <2 x i64> %v03, %vadd02
+ %vadd04 = add <2 x i64> %v04, %vadd03
+ %vadd05 = add <2 x i64> %v05, %vadd04
+ %vadd06 = add <2 x i64> %v06, %vadd05
+ %vadd07 = add <2 x i64> %v07, %vadd06
+ %vadd08 = add <2 x i64> %v08, %vadd07
+ %vadd09 = add <2 x i64> %v09, %vadd08
+ %vadd10 = add <2 x i64> %v10, %vadd09
+ %vadd11 = add <2 x i64> %v11, %vadd10
+ %vadd12 = add <2 x i64> %v12, %vadd11
+ %vadd13 = add <2 x i64> %v13, %vadd12
+ %vadd14 = add <2 x i64> %v14, %vadd13
+ %vadd15 = add <2 x i64> %v15, %vadd14
+ %vadd16 = add <2 x i64> %v16, %vadd15
+ %vadd17 = add <2 x i64> %v17, %vadd16
+ %vadd18 = add <2 x i64> %v18, %vadd17
+ %vadd19 = add <2 x i64> %v19, %vadd18
+ %vadd20 = add <2 x i64> %v20, %vadd19
+ %vadd21 = add <2 x i64> %v21, %vadd20
+ %vadd22 = add <2 x i64> %v22, %vadd21
+ %vadd23 = add <2 x i64> %v23, %vadd22
+ %vadd24 = add <2 x i64> %v24, %vadd23
+ %vadd25 = add <2 x i64> %v25, %vadd24
+ %vadd26 = add <2 x i64> %v26, %vadd25
+ %vadd27 = add <2 x i64> %v27, %vadd26
+ %vadd28 = add <2 x i64> %v28, %vadd27
+ %vadd29 = add <2 x i64> %v29, %vadd28
+ %vadd30 = add <2 x i64> %v30, %vadd29
+ %vadd31 = add <2 x i64> %v31, %vadd30
+ store volatile <2 x i64> %vadd00, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd01, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd02, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd03, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd04, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd05, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd06, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd07, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd08, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd09, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd10, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd11, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd12, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd13, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd14, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd15, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd16, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd17, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd18, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd19, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd20, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd21, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd22, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd23, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd24, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd25, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd26, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd27, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd28, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd29, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd30, <2 x i64> *%vec_ptr
+ store volatile <2 x i64> %vadd31, <2 x i64> *%vec_ptr
+ ret void
+}
+
+declare i64 @fun(i64 %arg0)
+
More information about the llvm-commits
mailing list