[llvm] r226399 - [PowerPC] Initial PPC64 calling-convention changes for fastcc
Hal Finkel
hfinkel at anl.gov
Sun Jan 18 04:08:48 PST 2015
Author: hfinkel
Date: Sun Jan 18 06:08:47 2015
New Revision: 226399
URL: http://llvm.org/viewvc/llvm-project?rev=226399&view=rev
Log:
[PowerPC] Initial PPC64 calling-convention changes for fastcc
The default calling convention specified by the PPC64 ELF (V1 and V2) ABI is
designed to work with both prototyped and non-prototyped/varargs functions. As
a result, GPRs and stack space are allocated for every argument, even those
that are passed in floating-point or vector registers.
GlobalOpt::OptimizeFunctions will transform local non-varargs functions (that
do not have their address taken) to use the 'fast' calling convention.
When functions are using the 'fast' calling convention, don't allocate GPRs for
arguments passed in other types of registers, and don't allocate stack space for
arguments passed in registers. Other changes for the fast calling convention
may be added in the future.
Added:
llvm/trunk/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll
llvm/trunk/test/CodeGen/PowerPC/ppc64-fastcc.ll
Modified:
llvm/trunk/lib/Target/PowerPC/PPCFastISel.cpp
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
Modified: llvm/trunk/lib/Target/PowerPC/PPCFastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCFastISel.cpp?rev=226399&r1=226398&r2=226399&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCFastISel.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCFastISel.cpp Sun Jan 18 06:08:47 2015
@@ -1275,7 +1275,7 @@ bool PPCFastISel::processCallArgs(SmallV
// Prepare to assign register arguments. Every argument uses up a
// GPR protocol register even if it's passed in a floating-point
- // register.
+ // register (unless we're using the fast calling convention).
unsigned NextGPR = PPC::X3;
unsigned NextFPR = PPC::F1;
@@ -1325,7 +1325,8 @@ bool PPCFastISel::processCallArgs(SmallV
unsigned ArgReg;
if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
ArgReg = NextFPR++;
- ++NextGPR;
+ if (CC != CallingConv::Fast)
+ ++NextGPR;
} else
ArgReg = NextGPR++;
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=226399&r1=226398&r2=226399&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Sun Jan 18 06:08:47 2015
@@ -2623,6 +2623,9 @@ PPCTargetLowering::LowerFormalArguments_
MachineFrameInfo *MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ assert(!(CallConv == CallingConv::Fast && isVarArg) &&
+ "fastcc not supported on varargs functions");
+
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Potential tail calls could cause overwriting of argument stack slots.
bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
@@ -2674,7 +2677,7 @@ PPCTargetLowering::LowerFormalArguments_
// although the first ones are often in registers.
unsigned ArgOffset = LinkageSize;
- unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
SmallVector<SDValue, 8> MemOps;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
@@ -2689,19 +2692,31 @@ PPCTargetLowering::LowerFormalArguments_
std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
CurArgIdx = Ins[ArgNo].OrigArgIndex;
- /* Respect alignment of argument on the stack. */
- unsigned Align =
- CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
- ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
- unsigned CurArgOffset = ArgOffset;
-
- /* Compute GPR index associated with argument offset. */
- GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
- GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
+ // We re-align the argument offset for each argument, except when using the
+ // fast calling convention, when we need to make sure we do that only when
+ // we'll actually use a stack slot.
+ unsigned CurArgOffset, Align;
+ auto ComputeArgOffset = [&]() {
+ /* Respect alignment of argument on the stack. */
+ Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+ CurArgOffset = ArgOffset;
+ };
+
+ if (CallConv != CallingConv::Fast) {
+ ComputeArgOffset();
+
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
+ }
// FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory.
if (Flags.isByVal()) {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+
// ObjSize is the true size, ArgSize rounded up to multiple of registers.
ObjSize = Flags.getByValSize();
ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
@@ -2745,7 +2760,7 @@ PPCTargetLowering::LowerFormalArguments_
InVals.push_back(Arg);
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store;
@@ -2807,7 +2822,7 @@ PPCTargetLowering::LowerFormalArguments_
// passed directly. Clang may use those instead of "byval" aggregate
// types to avoid forcing arguments to memory unnecessarily.
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
@@ -2815,10 +2830,14 @@ PPCTargetLowering::LowerFormalArguments_
// value to MVT::i64 and then truncate to the correct register size.
ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+
needsLoad = true;
ArgSize = PtrByteSize;
}
- ArgOffset += 8;
+ if (CallConv != CallingConv::Fast || needsLoad)
+ ArgOffset += 8;
break;
case MVT::f32:
@@ -2838,11 +2857,11 @@ PPCTargetLowering::LowerFormalArguments_
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++FPR_idx;
- } else if (GPR_idx != Num_GPR_Regs) {
+ } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
// This can only ever happen in the presence of f32 array types,
// since otherwise we never run out of FPRs before running out
// of GPRs.
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
if (ObjectVT == MVT::f32) {
@@ -2854,16 +2873,21 @@ PPCTargetLowering::LowerFormalArguments_
ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+
needsLoad = true;
}
// When passing an array of floats, the array occupies consecutive
// space in the argument area; only round up to the next doubleword
// at the end of the array. Otherwise, each float takes 8 bytes.
- ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
- ArgOffset += ArgSize;
- if (Flags.isInConsecutiveRegsLast())
- ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ if (CallConv != CallingConv::Fast || needsLoad) {
+ ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
+ ArgOffset += ArgSize;
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ }
break;
case MVT::v4f32:
case MVT::v4i32:
@@ -2881,9 +2905,13 @@ PPCTargetLowering::LowerFormalArguments_
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++VR_idx;
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+
needsLoad = true;
}
- ArgOffset += 16;
+ if (CallConv != CallingConv::Fast || needsLoad)
+ ArgOffset += 16;
break;
}
@@ -4270,6 +4298,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+ assert(!(CallConv == CallingConv::Fast && isVarArg) &&
+ "fastcc not supported on varargs functions");
+
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
// reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
@@ -4277,6 +4308,30 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
isELFv2ABI);
unsigned NumBytes = LinkageSize;
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ static const MCPhysReg GPR[] = {
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const MCPhysReg *FPR = GetFPR();
+
+ static const MCPhysReg VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ static const MCPhysReg VSRH[] = {
+ PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+ PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+ };
+
+ const unsigned NumGPRs = array_lengthof(GPR);
+ const unsigned NumFPRs = 13;
+ const unsigned NumVRs = array_lengthof(VR);
+
+ // When using the fast calling convention, we don't provide backing for
+ // arguments that will be in registers.
+ unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
// Add up all the space actually used.
for (unsigned i = 0; i != NumOps; ++i) {
@@ -4284,6 +4339,35 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
EVT ArgVT = Outs[i].VT;
EVT OrigVT = Outs[i].ArgVT;
+ if (CallConv == CallingConv::Fast) {
+ if (Flags.isByVal())
+ NumGPRsUsed += (Flags.getByValSize()+7)/8;
+ else
+ switch (ArgVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::i64:
+ if (++NumGPRsUsed <= NumGPRs)
+ continue;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (++NumFPRsUsed <= NumFPRs)
+ continue;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ case MVT::v2f64:
+ case MVT::v2i64:
+ if (++NumVRsUsed <= NumVRs)
+ continue;
+ break;
+ }
+ }
+
/* Respect alignment of argument on the stack. */
unsigned Align =
CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
@@ -4340,26 +4424,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
// must be stored to our stack, and loaded into integer regs as well, if
// any integer regs are available for argument passing.
unsigned ArgOffset = LinkageSize;
- unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
-
- static const MCPhysReg GPR[] = {
- PPC::X3, PPC::X4, PPC::X5, PPC::X6,
- PPC::X7, PPC::X8, PPC::X9, PPC::X10,
- };
- static const MCPhysReg *FPR = GetFPR();
-
- static const MCPhysReg VR[] = {
- PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
- PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
- };
- static const MCPhysReg VSRH[] = {
- PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
- PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
- };
-
- const unsigned NumGPRs = array_lengthof(GPR);
- const unsigned NumFPRs = 13;
- const unsigned NumVRs = array_lengthof(VR);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
@@ -4371,22 +4435,31 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
EVT ArgVT = Outs[i].VT;
EVT OrigVT = Outs[i].ArgVT;
- /* Respect alignment of argument on the stack. */
- unsigned Align =
- CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
- ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
-
- /* Compute GPR index associated with argument offset. */
- GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
- GPR_idx = std::min(GPR_idx, NumGPRs);
-
// PtrOff will be used to store the current argument to the stack if a
// register cannot be found for it.
SDValue PtrOff;
- PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
-
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ // We re-align the argument offset for each argument, except when using the
+ // fast calling convention, when we need to make sure we do that only when
+ // we'll actually use a stack slot.
+ auto ComputePtrOff = [&]() {
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+
+ PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ };
+
+ if (CallConv != CallingConv::Fast) {
+ ComputePtrOff();
+
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, NumGPRs);
+ }
// Promote integers to 64-bit values.
if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
@@ -4411,6 +4484,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
if (Size == 0)
continue;
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
// All aggregates smaller than 8 bytes must be passed right-justified.
if (Size==1 || Size==2 || Size==4) {
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
@@ -4419,7 +4495,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
MachinePointerInfo(), VT,
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
ArgOffset += PtrByteSize;
continue;
@@ -4481,7 +4557,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
// Done with this argument.
ArgOffset += PtrByteSize;
@@ -4517,13 +4593,19 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
// passed directly. Clang may use those instead of "byval" aggregate
// types to avoid forcing arguments to memory unnecessarily.
if (GPR_idx != NumGPRs) {
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
+ if (CallConv == CallingConv::Fast)
+ ArgOffset += PtrByteSize;
}
- ArgOffset += PtrByteSize;
+ if (CallConv != CallingConv::Fast)
+ ArgOffset += PtrByteSize;
break;
case MVT::f32:
case MVT::f64: {
@@ -4537,6 +4619,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
// then the parameter save area. For now, put all arguments to vararg
// routines always in both locations (FPR *and* GPR or stack slot).
bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
+ bool NeededLoad = false;
// First load the argument into the next available FPR.
if (FPR_idx != NumFPRs)
@@ -4545,7 +4628,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
// Next, load the argument into GPR or stack slot if needed.
if (!NeedGPROrStack)
;
- else if (GPR_idx != NumGPRs) {
+ else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
// In the non-vararg case, this can only ever happen in the
// presence of f32 array types, since otherwise we never run
// out of FPRs before running out of GPRs.
@@ -4584,8 +4667,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
ArgVal = SDValue();
if (ArgVal.getNode())
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
// Single-precision floating-point values are mapped to the
// second (rightmost) word of the stack doubleword.
if (Arg.getValueType() == MVT::f32 &&
@@ -4597,14 +4683,18 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
+
+ NeededLoad = true;
}
// When passing an array of floats, the array occupies consecutive
// space in the argument area; only round up to the next doubleword
// at the end of the array. Otherwise, each float takes 8 bytes.
- ArgOffset += (Arg.getValueType() == MVT::f32 &&
- Flags.isInConsecutiveRegs()) ? 4 : 8;
- if (Flags.isInConsecutiveRegsLast())
- ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ if (CallConv != CallingConv::Fast || NeededLoad) {
+ ArgOffset += (Arg.getValueType() == MVT::f32 &&
+ Flags.isInConsecutiveRegs()) ? 4 : 8;
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ }
break;
}
case MVT::v4f32:
@@ -4663,11 +4753,18 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
RegsToPass.push_back(std::make_pair(VReg, Arg));
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
+ if (CallConv == CallingConv::Fast)
+ ArgOffset += 16;
}
- ArgOffset += 16;
+
+ if (CallConv != CallingConv::Fast)
+ ArgOffset += 16;
break;
}
}
Added: llvm/trunk/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll?rev=226399&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll Sun Jan 18 06:08:47 2015
@@ -0,0 +1,56 @@
+; RUN: llc -mcpu=pwr7 -mattr=-vsx -fast-isel -fast-isel-abort < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define fastcc i64 @g1(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, double %f3, i64 %g4, double %f4) #0 {
+ ret i64 %g1
+
+; CHECK-LABEL: @g1
+; CHECK-NOT: mr 3,
+; CHECK: blr
+}
+
+define fastcc i64 @g2(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, double %f3, i64 %g4, double %f4) #0 {
+ ret i64 %g2
+
+; CHECK-LABEL: @g2
+; CHECK: mr 3, 4
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g3(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, double %f3, i64 %g4, double %f4) #0 {
+ ret i64 %g3
+
+; CHECK-LABEL: @g3
+; CHECK: mr 3, 5
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f2(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, double %f3, i64 %g4, double %f4) #0 {
+ ret double %f2
+
+; CHECK-LABEL: @f2
+; CHECK: fmr 1, 2
+; CHECK-NEXT: blr
+}
+
+define void @cg2(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, i64 %v, double 0.0, i64 0, double 0.0, i64 0, double 0.0)
+ ret void
+
+; CHECK-LABEL: @cg2
+; CHECK: mr 4, 3
+; CHECK: blr
+}
+
+define void @cf2(double %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, i64 0, double %v, i64 0, double 0.0, i64 0, double 0.0)
+ ret void
+
+; CHECK-LABEL: @cf2
+; CHECK: mr 2, 1
+; CHECK: blr
+}
+
+attributes #0 = { nounwind }
+
Added: llvm/trunk/test/CodeGen/PowerPC/ppc64-fastcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc64-fastcc.ll?rev=226399&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc64-fastcc.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/ppc64-fastcc.ll Sun Jan 18 06:08:47 2015
@@ -0,0 +1,540 @@
+; RUN: llc -mcpu=pwr7 -mattr=-vsx < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define fastcc i64 @g1(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g1
+
+; CHECK-LABEL: @g1
+; CHECK-NOT: mr 3,
+; CHECK: blr
+}
+
+define fastcc i64 @g2(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g2
+
+; CHECK-LABEL: @g2
+; CHECK: mr 3, 4
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g3(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g3
+
+; CHECK-LABEL: @g3
+; CHECK: mr 3, 5
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g4(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g4
+
+; CHECK-LABEL: @g4
+; CHECK: mr 3, 6
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g5(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g5
+
+; CHECK-LABEL: @g5
+; CHECK: mr 3, 7
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g6(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g6
+
+; CHECK-LABEL: @g6
+; CHECK: mr 3, 8
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g7(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g7
+
+; CHECK-LABEL: @g7
+; CHECK: mr 3, 9
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g8(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g8
+
+; CHECK-LABEL: @g8
+; CHECK: mr 3, 10
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g9(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g9
+
+; CHECK-LABEL: @g9
+; CHECK: ld 3, 48(1)
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g10(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g10
+
+; CHECK-LABEL: @g10
+; CHECK: ld 3, 56(1)
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g11(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g11
+
+; CHECK-LABEL: @g11
+; CHECK: ld 3, 64(1)
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f1(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f1
+
+; CHECK-LABEL: @f1
+; CHECK-NOT: fmr 1,
+; CHECK: blr
+}
+
+define fastcc double @f2(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f2
+
+; CHECK-LABEL: @f2
+; CHECK: fmr 1, 2
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f3(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f3
+
+; CHECK-LABEL: @f3
+; CHECK: fmr 1, 3
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f4(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f4
+
+; CHECK-LABEL: @f4
+; CHECK: fmr 1, 4
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f5(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f5
+
+; CHECK-LABEL: @f5
+; CHECK: fmr 1, 5
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f6(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f6
+
+; CHECK-LABEL: @f6
+; CHECK: fmr 1, 6
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f7(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f7
+
+; CHECK-LABEL: @f7
+; CHECK: fmr 1, 7
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f8(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f8
+
+; CHECK-LABEL: @f8
+; CHECK: fmr 1, 8
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f9(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f9
+
+; CHECK-LABEL: @f9
+; CHECK: fmr 1, 9
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f10(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f10
+
+; CHECK-LABEL: @f10
+; CHECK: fmr 1, 10
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f11(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f11
+
+; CHECK-LABEL: @f11
+; CHECK: fmr 1, 11
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f12(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f12
+
+; CHECK-LABEL: @f12
+; CHECK: fmr 1, 12
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f13(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f13
+
+; CHECK-LABEL: @f13
+; CHECK: fmr 1, 13
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f14(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f14
+
+; CHECK-LABEL: @f14
+; CHECK: lfd 1, 120(1)
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f15(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f15
+
+; CHECK-LABEL: @f15
+; CHECK: lfd 1, 152(1)
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v1(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v1
+
+; CHECK-LABEL: @v1
+; CHECK-NOT: vor 2,
+; CHECK: blr
+}
+
+define fastcc <4 x i32> @v2(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v2
+
+; CHECK-LABEL: @v2
+; CHECK: vor 2, 3, 3
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v3(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v3
+
+; CHECK-LABEL: @v3
+; CHECK: vor 2, 4, 4
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v4(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v4
+
+; CHECK-LABEL: @v4
+; CHECK: vor 2, 5, 5
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v5(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v5
+
+; CHECK-LABEL: @v5
+; CHECK: vor 2, 6, 6
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v6(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v6
+
+; CHECK-LABEL: @v6
+; CHECK: vor 2, 7, 7
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v7(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v7
+
+; CHECK-LABEL: @v7
+; CHECK: vor 2, 8, 8
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v8(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v8
+
+; CHECK-LABEL: @v8
+; CHECK: vor 2, 9, 9
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v9(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v9
+
+; CHECK-LABEL: @v9
+; CHECK: vor 2, 10, 10
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v10(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v10
+
+; CHECK-LABEL: @v10
+; CHECK: vor 2, 11, 11
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v11(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v11
+
+; CHECK-LABEL: @v11
+; CHECK: vor 2, 12, 12
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v12(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v12
+
+; CHECK-LABEL: @v12
+; CHECK: vor 2, 13, 13
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v13(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v13
+
+; CHECK-LABEL: @v13
+; CHECK: addi [[REG1:[0-9]+]], 1, 96
+; CHECK-NEXT: lvx 2, 0, [[REG1]]
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v14(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v14
+
+; CHECK-LABEL: @v14
+; CHECK: addi [[REG1:[0-9]+]], 1, 128
+; CHECK-NEXT: lvx 2, 0, [[REG1]]
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v15(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v15
+
+; CHECK-LABEL: @v15
+; CHECK: addi [[REG1:[0-9]+]], 1, 160
+; CHECK-NEXT: lvx 2, 0, [[REG1]]
+; CHECK-NEXT: blr
+}
+
+define void @cg1(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg1
+; CHECK-NOT: {{^[ \t]*}}mr 3,
+; CHECK: blr
+}
+
+define void @cg2(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg2
+; CHECK: mr 4, 3
+; CHECK: blr
+}
+
+define void @cg3(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg3
+; CHECK: mr 5, 3
+; CHECK: blr
+}
+
+define void @cg4(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg4
+; CHECK: mr 6, 3
+; CHECK: blr
+}
+
+define void @cg5(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg5
+; CHECK: mr 7, 3
+; CHECK: blr
+}
+
+define void @cg6(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg6
+; CHECK: mr 8, 3
+; CHECK: blr
+}
+
+define void @cg7(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg7
+; CHECK: mr 9, 3
+; CHECK: blr
+}
+
+define void @cg8(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg8
+; CHECK: mr 10, 3
+; CHECK: blr
+}
+
+define void @cg9(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg9
+; CHECK: mr [[REG1:[0-9]+]], 3
+; CHECK: std [[REG1]], 48(1)
+; CHECK: blr
+}
+
+define void @cg10(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg10
+; CHECK: mr [[REG1:[0-9]+]], 3
+; CHECK: std [[REG1]], 56(1)
+; CHECK: blr
+}
+
+define void @cg11(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg11
+; CHECK: mr [[REG1:[0-9]+]], 3
+; CHECK: std [[REG1]], 64(1)
+; CHECK: blr
+}
+
+define void @cf1(double %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double %v, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cf1
+; CHECK-NOT: fmr 1,
+; CHECK: blr
+}
+
+define void @cf2(double %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double %v, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cf2
+; CHECK: fmr 2, 1
+; CHECK: blr
+}
+
+define void @cf3(double %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double %v, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cf3
+; CHECK: fmr 3, 1
+; CHECK: blr
+}
+
+define void @cf4(double %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double %v, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cf4
+; CHECK: fmr 4, 1
+; CHECK: blr
+}
+
+define void @cf5(double %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double %v, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cf5
+; CHECK: fmr 5, 1
+; CHECK: blr
+}
+
+define void @cf14(double %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double %v, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cf14
+; CHECK: stfd 1, 120(1)
+; CHECK: blr
+}
+
+define void @cf15(double %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double %v, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cf15
+; CHECK: stfd 1, 152(1)
+; CHECK: blr
+}
+
+define void @cv2(<4 x i32> %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> %v, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cv2
+; CHECK: vor 3, 2, 2
+; CHECK: blr
+}
+
+define void @cv3(<4 x i32> %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> %v, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cv3
+; CHECK: vor 4, 2, 2
+; CHECK: blr
+}
+
+define void @cv13(<4 x i32> %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> %v, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cv13
+; CHECK: li [[REG1:[0-9]+]], 96
+; CHECK: stvx 2, 1, [[REG1]]
+; CHECK: blr
+}
+
+define void @cv14(<4 x i32> %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> %v, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cv14
+; CHECK: li [[REG1:[0-9]+]], 128
+; CHECK: stvx 2, 1, [[REG1]]
+; CHECK: blr
+}
+
+attributes #0 = { nounwind }
+
More information about the llvm-commits
mailing list