[llvm] 0b252da - [SystemZ] Handle IR struct arguments correctly. (#169583)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 4 11:14:35 PST 2025
Author: Jonas Paulsson
Date: 2025-12-04T13:14:31-06:00
New Revision: 0b252daf64153f57203f19aa9ea13a3d72058b19
URL: https://github.com/llvm/llvm-project/commit/0b252daf64153f57203f19aa9ea13a3d72058b19
DIFF: https://github.com/llvm/llvm-project/commit/0b252daf64153f57203f19aa9ea13a3d72058b19.diff
LOG: [SystemZ] Handle IR struct arguments correctly. (#169583)
- The size of the stack slot was previously computed in LowerCall() by using
the original type, but that didn't work for a struct. Compute the size
by looking at the VT of each part and the number of them instead.
- All the members of a struct have the same OrigArgIndex, so it doesn't work
to assume that following parts belong to a split argument until another
OrigArgIndex is encountered. Use the isSplit() and isSplitEnd() flags
instead.
- Detect any scalar integer argumet >64 bits in CanLowerReturn() instead of
just i128, in order to let all of them be passed on stack.
Fixes #168460
Added:
llvm/test/CodeGen/SystemZ/args-22.ll
Modified:
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index eb93024bed35c..2511d08a6d0ef 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1970,6 +1970,28 @@ SDValue SystemZTargetLowering::joinRegisterPartsIntoValue(
return SDValue();
}
+// The first part of a split stack argument is at index I in Args (and
+// ArgLocs). Return the type of a part and the number of them by reference.
+template <class ArgTy>
+static bool analyzeArgSplit(const SmallVectorImpl<ArgTy> &Args,
+ SmallVector<CCValAssign, 16> &ArgLocs, unsigned I,
+ MVT &PartVT, unsigned &NumParts) {
+ if (!Args[I].Flags.isSplit())
+ return false;
+ assert(I < ArgLocs.size() && ArgLocs.size() == Args.size() &&
+ "ArgLocs havoc.");
+ PartVT = ArgLocs[I].getValVT();
+ NumParts = 1;
+ for (unsigned PartIdx = I + 1;; ++PartIdx) {
+ assert(PartIdx != ArgLocs.size() && "SplitEnd not found.");
+ assert(ArgLocs[PartIdx].getValVT() == PartVT && "Unsupported split.");
+ ++NumParts;
+ if (Args[PartIdx].Flags.isSplitEnd())
+ break;
+ }
+ return true;
+}
+
SDValue SystemZTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
@@ -2074,16 +2096,26 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
MachinePointerInfo()));
// If the original argument was split (e.g. i128), we need
// to load all parts of it here (using the same address).
- unsigned ArgIndex = Ins[I].OrigArgIndex;
- assert (Ins[I].PartOffset == 0);
- while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
- CCValAssign &PartVA = ArgLocs[I + 1];
- unsigned PartOffset = Ins[I + 1].PartOffset;
- SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
- DAG.getIntPtrConstant(PartOffset, DL));
- InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
- MachinePointerInfo()));
- ++I;
+ MVT PartVT;
+ unsigned NumParts;
+ if (analyzeArgSplit(Ins, ArgLocs, I, PartVT, NumParts)) {
+ // TODO: It is strange that while LowerCallTo() sets the PartOffset
+ // relative to the first split part LowerArguments() sets the offset
+ // from the beginning of the struct. So with {i32, i256}, the
+ // PartOffset for the i256 parts are
diff erently handled. Try to
+ // remove that
diff erence and use PartOffset directly here (instead
+ // of SplitBaseOffs).
+ unsigned SplitBaseOffs = Ins[I].PartOffset;
+ for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
+ ++I;
+ CCValAssign &PartVA = ArgLocs[I];
+ unsigned PartOffset = Ins[I].PartOffset - SplitBaseOffs;
+ SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
+ DAG.getIntPtrConstant(PartOffset, DL));
+ InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
+ MachinePointerInfo()));
+ assert(PartOffset && "Offset should be non-zero.");
+ }
}
} else
InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
@@ -2319,18 +2351,13 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (VA.getLocInfo() == CCValAssign::Indirect) {
// Store the argument in a stack slot and pass its address.
- unsigned ArgIndex = Outs[I].OrigArgIndex;
EVT SlotVT;
- if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
- // Allocate the full stack space for a promoted (and split) argument.
- Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
- EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
- MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
- unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
- SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
- } else {
+ MVT PartVT;
+ unsigned NumParts = 1;
+ if (analyzeArgSplit(Outs, ArgLocs, I, PartVT, NumParts))
+ SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * NumParts);
+ else
SlotVT = Outs[I].VT;
- }
SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
MemOpChains.push_back(
@@ -2338,18 +2365,19 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
MachinePointerInfo::getFixedStack(MF, FI)));
// If the original argument was split (e.g. i128), we need
// to store all parts of it here (and pass just one address).
- assert (Outs[I].PartOffset == 0);
- while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
- SDValue PartValue = OutVals[I + 1];
- unsigned PartOffset = Outs[I + 1].PartOffset;
+ assert(Outs[I].PartOffset == 0);
+ for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
+ ++I;
+ SDValue PartValue = OutVals[I];
+ unsigned PartOffset = Outs[I].PartOffset;
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
DAG.getIntPtrConstant(PartOffset, DL));
MemOpChains.push_back(
DAG.getStore(Chain, DL, PartValue, Address,
MachinePointerInfo::getFixedStack(MF, FI)));
+ assert(PartOffset && "Offset should be non-zero.");
assert((PartOffset + PartValue.getValueType().getStoreSize() <=
SlotVT.getStoreSize()) && "Not enough space for argument part!");
- ++I;
}
ArgValue = SpillSlot;
} else
@@ -2534,7 +2562,7 @@ bool SystemZTargetLowering::CanLowerReturn(
// Special case that we cannot easily detect in RetCC_SystemZ since
// i128 may not be a legal type.
for (auto &Out : Outs)
- if (Out.ArgVT == MVT::i128)
+ if (Out.ArgVT.isScalarInteger() && Out.ArgVT.getSizeInBits() > 64)
return false;
SmallVector<CCValAssign, 16> RetLocs;
diff --git a/llvm/test/CodeGen/SystemZ/args-22.ll b/llvm/test/CodeGen/SystemZ/args-22.ll
new file mode 100644
index 0000000000000..ba422b65fc299
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/args-22.ll
@@ -0,0 +1,1004 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s --check-prefix=VECTOR
+;
+; Test passing IR struct arguments, which do not adhere to the SystemZ ABI but are
+; split up with each element passed like a separate argument.
+
+ at Fnptr = external global ptr
+ at Src = external global ptr
+ at Dst = external global ptr
+
+%Ty0 = type {i128}
+define void @arg0(%Ty0 %A) {
+; CHECK-LABEL: arg0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lg %r0, 8(%r2)
+; CHECK-NEXT: lgrl %r1, Dst at GOT
+; CHECK-NEXT: lg %r2, 0(%r2)
+; CHECK-NEXT: stg %r0, 8(%r1)
+; CHECK-NEXT: stg %r2, 0(%r1)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: arg0:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: vl %v0, 0(%r2), 3
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
+; VECTOR-NEXT: vst %v0, 0(%r1), 3
+; VECTOR-NEXT: br %r14
+ store %Ty0 %A, ptr @Dst
+ ret void
+}
+
+define void @call0() {
+; CHECK-LABEL: call0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -176
+; CHECK-NEXT: .cfi_def_cfa_offset 336
+; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: lg %r0, 8(%r1)
+; CHECK-NEXT: lg %r1, 0(%r1)
+; CHECK-NEXT: stg %r0, 168(%r15)
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: stg %r1, 160(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lmg %r14, %r15, 288(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: call0:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -176
+; VECTOR-NEXT: .cfi_def_cfa_offset 336
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: vl %v0, 0(%r1), 3
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: vst %v0, 160(%r15), 3
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: lmg %r14, %r15, 288(%r15)
+; VECTOR-NEXT: br %r14
+ %L = load %Ty0, ptr @Src
+ call void @Fnptr(%Ty0 %L)
+ ret void
+}
+
+define %Ty0 @ret0() {
+; CHECK-LABEL: ret0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -176
+; CHECK-NEXT: .cfi_def_cfa_offset 336
+; CHECK-NEXT: lgr %r13, %r2
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lg %r0, 168(%r15)
+; CHECK-NEXT: lg %r1, 160(%r15)
+; CHECK-NEXT: stg %r0, 8(%r13)
+; CHECK-NEXT: stg %r1, 0(%r13)
+; CHECK-NEXT: lmg %r13, %r15, 280(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: ret0:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT: .cfi_offset %r13, -56
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -176
+; VECTOR-NEXT: .cfi_def_cfa_offset 336
+; VECTOR-NEXT: lgr %r13, %r2
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: vl %v0, 160(%r15), 3
+; VECTOR-NEXT: vst %v0, 0(%r13), 3
+; VECTOR-NEXT: lmg %r13, %r15, 280(%r15)
+; VECTOR-NEXT: br %r14
+ %C = call %Ty0 @Fnptr()
+ ret %Ty0 %C
+}
+
+%Ty1 = type {i72}
+define void @arg1(%Ty1 %A) {
+; CHECK-LABEL: arg1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lg %r0, 8(%r2)
+; CHECK-NEXT: lgrl %r1, Dst at GOT
+; CHECK-NEXT: lg %r2, 0(%r2)
+; CHECK-NEXT: stc %r0, 8(%r1)
+; CHECK-NEXT: sllg %r2, %r2, 56
+; CHECK-NEXT: rosbg %r2, %r0, 8, 63, 56
+; CHECK-NEXT: stg %r2, 0(%r1)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: arg1:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: vl %v0, 0(%r2), 3
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
+; VECTOR-NEXT: vrepib %v1, 8
+; VECTOR-NEXT: vsteb %v0, 8(%r1), 15
+; VECTOR-NEXT: vsrlb %v0, %v0, %v1
+; VECTOR-NEXT: vsteg %v0, 0(%r1), 1
+; VECTOR-NEXT: br %r14
+ store %Ty1 %A, ptr @Dst
+ ret void
+}
+
+define void @call1() {
+; CHECK-LABEL: call1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -176
+; CHECK-NEXT: .cfi_def_cfa_offset 336
+; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: lg %r0, 0(%r1)
+; CHECK-NEXT: sllg %r2, %r0, 8
+; CHECK-NEXT: ic %r2, 8(%r1)
+; CHECK-NEXT: srlg %r0, %r0, 56
+; CHECK-NEXT: stg %r2, 168(%r15)
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: stg %r0, 160(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lmg %r14, %r15, 288(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: call1:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -176
+; VECTOR-NEXT: .cfi_def_cfa_offset 336
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: vgbm %v0, 0
+; VECTOR-NEXT: vleb %v0, 8(%r1), 15
+; VECTOR-NEXT: vlrepg %v1, 0(%r1)
+; VECTOR-NEXT: vrepib %v2, 8
+; VECTOR-NEXT: vslb %v1, %v1, %v2
+; VECTOR-NEXT: vo %v0, %v0, %v1
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: vst %v0, 160(%r15), 3
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: lmg %r14, %r15, 288(%r15)
+; VECTOR-NEXT: br %r14
+ %L = load %Ty1, ptr @Src
+ call void @Fnptr(%Ty1 %L)
+ ret void
+}
+
+define %Ty1 @ret1() {
+; CHECK-LABEL: ret1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -176
+; CHECK-NEXT: .cfi_def_cfa_offset 336
+; CHECK-NEXT: lgr %r13, %r2
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lg %r0, 160(%r15)
+; CHECK-NEXT: llgc %r1, 168(%r15)
+; CHECK-NEXT: stg %r0, 0(%r13)
+; CHECK-NEXT: stc %r1, 8(%r13)
+; CHECK-NEXT: lmg %r13, %r15, 280(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: ret1:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT: .cfi_offset %r13, -56
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -176
+; VECTOR-NEXT: .cfi_def_cfa_offset 336
+; VECTOR-NEXT: lgr %r13, %r2
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: vgbm %v0, 0
+; VECTOR-NEXT: vleb %v0, 168(%r15), 15
+; VECTOR-NEXT: vlrepg %v1, 160(%r15)
+; VECTOR-NEXT: vsteg %v1, 0(%r13), 1
+; VECTOR-NEXT: vsteb %v0, 8(%r13), 15
+; VECTOR-NEXT: lmg %r13, %r15, 280(%r15)
+; VECTOR-NEXT: br %r14
+ %C = call %Ty1 @Fnptr()
+ ret %Ty1 %C
+}
+
+%Ty2 = type {i128, i128}
+define void @arg2(%Ty2 %A) {
+; CHECK-LABEL: arg2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lg %r0, 8(%r3)
+; CHECK-NEXT: lgrl %r1, Dst at GOT
+; CHECK-NEXT: lg %r3, 0(%r3)
+; CHECK-NEXT: lg %r4, 8(%r2)
+; CHECK-NEXT: lg %r2, 0(%r2)
+; CHECK-NEXT: stg %r0, 24(%r1)
+; CHECK-NEXT: stg %r3, 16(%r1)
+; CHECK-NEXT: stg %r4, 8(%r1)
+; CHECK-NEXT: stg %r2, 0(%r1)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: arg2:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: vl %v0, 0(%r2), 3
+; VECTOR-NEXT: vl %v1, 0(%r3), 3
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
+; VECTOR-NEXT: vst %v1, 16(%r1), 3
+; VECTOR-NEXT: vst %v0, 0(%r1), 3
+; VECTOR-NEXT: br %r14
+ store %Ty2 %A, ptr @Dst
+ ret void
+}
+
+define void @call2() {
+; CHECK-LABEL: call2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -192
+; CHECK-NEXT: .cfi_def_cfa_offset 352
+; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: lg %r0, 24(%r1)
+; CHECK-NEXT: lg %r2, 16(%r1)
+; CHECK-NEXT: lg %r3, 8(%r1)
+; CHECK-NEXT: lg %r1, 0(%r1)
+; CHECK-NEXT: stg %r0, 168(%r15)
+; CHECK-NEXT: stg %r2, 160(%r15)
+; CHECK-NEXT: stg %r3, 184(%r15)
+; CHECK-NEXT: la %r2, 176(%r15)
+; CHECK-NEXT: la %r3, 160(%r15)
+; CHECK-NEXT: stg %r1, 176(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: call2:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -192
+; VECTOR-NEXT: .cfi_def_cfa_offset 352
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: vl %v0, 0(%r1), 3
+; VECTOR-NEXT: vl %v1, 16(%r1), 3
+; VECTOR-NEXT: la %r2, 176(%r15)
+; VECTOR-NEXT: la %r3, 160(%r15)
+; VECTOR-NEXT: vst %v1, 160(%r15), 3
+; VECTOR-NEXT: vst %v0, 176(%r15), 3
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT: br %r14
+ %L = load %Ty2, ptr @Src
+ call void @Fnptr(%Ty2 %L)
+ ret void
+}
+
+define %Ty2 @ret2() {
+; CHECK-LABEL: ret2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -192
+; CHECK-NEXT: .cfi_def_cfa_offset 352
+; CHECK-NEXT: lgr %r13, %r2
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lg %r0, 176(%r15)
+; CHECK-NEXT: lg %r1, 184(%r15)
+; CHECK-NEXT: lg %r2, 160(%r15)
+; CHECK-NEXT: lg %r3, 168(%r15)
+; CHECK-NEXT: stg %r0, 16(%r13)
+; CHECK-NEXT: stg %r1, 24(%r13)
+; CHECK-NEXT: stg %r2, 0(%r13)
+; CHECK-NEXT: stg %r3, 8(%r13)
+; CHECK-NEXT: lmg %r13, %r15, 296(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: ret2:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT: .cfi_offset %r13, -56
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -192
+; VECTOR-NEXT: .cfi_def_cfa_offset 352
+; VECTOR-NEXT: lgr %r13, %r2
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: vl %v0, 160(%r15), 3
+; VECTOR-NEXT: vl %v1, 176(%r15), 3
+; VECTOR-NEXT: vst %v1, 16(%r13), 3
+; VECTOR-NEXT: vst %v0, 0(%r13), 3
+; VECTOR-NEXT: lmg %r13, %r15, 296(%r15)
+; VECTOR-NEXT: br %r14
+ %C = call %Ty2 @Fnptr()
+ ret %Ty2 %C
+}
+
+%Ty3 = type {i72, i128}
+define void @arg3(%Ty3 %A) {
+; CHECK-LABEL: arg3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lg %r0, 8(%r3)
+; CHECK-NEXT: lgrl %r1, Dst at GOT
+; CHECK-NEXT: lg %r3, 0(%r3)
+; CHECK-NEXT: lg %r4, 8(%r2)
+; CHECK-NEXT: lg %r2, 0(%r2)
+; CHECK-NEXT: stg %r0, 24(%r1)
+; CHECK-NEXT: stg %r3, 16(%r1)
+; CHECK-NEXT: stc %r4, 8(%r1)
+; CHECK-NEXT: sllg %r0, %r2, 56
+; CHECK-NEXT: rosbg %r0, %r4, 8, 63, 56
+; CHECK-NEXT: stg %r0, 0(%r1)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: arg3:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: vl %v0, 0(%r3), 3
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
+; VECTOR-NEXT: vl %v1, 0(%r2), 3
+; VECTOR-NEXT: vsteb %v1, 8(%r1), 15
+; VECTOR-NEXT: vst %v0, 16(%r1), 3
+; VECTOR-NEXT: vrepib %v0, 8
+; VECTOR-NEXT: vsrlb %v0, %v1, %v0
+; VECTOR-NEXT: vsteg %v0, 0(%r1), 1
+; VECTOR-NEXT: br %r14
+ store %Ty3 %A, ptr @Dst
+ ret void
+}
+
+define void @call3() {
+; CHECK-LABEL: call3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -192
+; CHECK-NEXT: .cfi_def_cfa_offset 352
+; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: lg %r0, 0(%r1)
+; CHECK-NEXT: sllg %r2, %r0, 8
+; CHECK-NEXT: lg %r3, 24(%r1)
+; CHECK-NEXT: lg %r4, 16(%r1)
+; CHECK-NEXT: ic %r2, 8(%r1)
+; CHECK-NEXT: srlg %r0, %r0, 56
+; CHECK-NEXT: stg %r3, 168(%r15)
+; CHECK-NEXT: stg %r4, 160(%r15)
+; CHECK-NEXT: stg %r2, 184(%r15)
+; CHECK-NEXT: la %r2, 176(%r15)
+; CHECK-NEXT: la %r3, 160(%r15)
+; CHECK-NEXT: stg %r0, 176(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: call3:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -192
+; VECTOR-NEXT: .cfi_def_cfa_offset 352
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: vgbm %v0, 0
+; VECTOR-NEXT: vleb %v0, 8(%r1), 15
+; VECTOR-NEXT: vlrepg %v1, 0(%r1)
+; VECTOR-NEXT: vrepib %v2, 8
+; VECTOR-NEXT: vslb %v1, %v1, %v2
+; VECTOR-NEXT: vo %v0, %v0, %v1
+; VECTOR-NEXT: vl %v1, 16(%r1), 3
+; VECTOR-NEXT: la %r2, 176(%r15)
+; VECTOR-NEXT: la %r3, 160(%r15)
+; VECTOR-NEXT: vst %v1, 160(%r15), 3
+; VECTOR-NEXT: vst %v0, 176(%r15), 3
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT: br %r14
+ %L = load %Ty3, ptr @Src
+ call void @Fnptr(%Ty3 %L)
+ ret void
+}
+
+define %Ty3 @ret3() {
+; CHECK-LABEL: ret3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -192
+; CHECK-NEXT: .cfi_def_cfa_offset 352
+; CHECK-NEXT: lgr %r13, %r2
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lg %r0, 176(%r15)
+; CHECK-NEXT: lg %r1, 184(%r15)
+; CHECK-NEXT: lg %r2, 160(%r15)
+; CHECK-NEXT: llgc %r3, 168(%r15)
+; CHECK-NEXT: stg %r0, 16(%r13)
+; CHECK-NEXT: stg %r1, 24(%r13)
+; CHECK-NEXT: stg %r2, 0(%r13)
+; CHECK-NEXT: stc %r3, 8(%r13)
+; CHECK-NEXT: lmg %r13, %r15, 296(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: ret3:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT: .cfi_offset %r13, -56
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -192
+; VECTOR-NEXT: .cfi_def_cfa_offset 352
+; VECTOR-NEXT: lgr %r13, %r2
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: vgbm %v0, 0
+; VECTOR-NEXT: vleb %v0, 168(%r15), 15
+; VECTOR-NEXT: vlrepg %v1, 160(%r15)
+; VECTOR-NEXT: vl %v2, 176(%r15), 3
+; VECTOR-NEXT: vst %v2, 16(%r13), 3
+; VECTOR-NEXT: vsteg %v1, 0(%r13), 1
+; VECTOR-NEXT: vsteb %v0, 8(%r13), 15
+; VECTOR-NEXT: lmg %r13, %r15, 296(%r15)
+; VECTOR-NEXT: br %r14
+ %C = call %Ty3 @Fnptr()
+ ret %Ty3 %C
+}
+
+%Ty4 = type {float, i8, i16, i32, i64, i128, i8}
+define void @arg4(%Ty4 %A) {
+; CHECK-LABEL: arg4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: l %r0, 164(%r15)
+; CHECK-NEXT: lgrl %r1, Dst at GOT
+; CHECK-NEXT: lg %r14, 0(%r6)
+; CHECK-NEXT: lg %r13, 8(%r6)
+; CHECK-NEXT: stc %r0, 40(%r1)
+; CHECK-NEXT: stg %r5, 16(%r1)
+; CHECK-NEXT: st %r4, 8(%r1)
+; CHECK-NEXT: sth %r3, 6(%r1)
+; CHECK-NEXT: stc %r2, 4(%r1)
+; CHECK-NEXT: ste %f0, 0(%r1)
+; CHECK-NEXT: stg %r13, 32(%r1)
+; CHECK-NEXT: stg %r14, 24(%r1)
+; CHECK-NEXT: lmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: arg4:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: vl %v1, 0(%r6), 3
+; VECTOR-NEXT: l %r0, 164(%r15)
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
+; VECTOR-NEXT: stc %r0, 40(%r1)
+; VECTOR-NEXT: stg %r5, 16(%r1)
+; VECTOR-NEXT: st %r4, 8(%r1)
+; VECTOR-NEXT: sth %r3, 6(%r1)
+; VECTOR-NEXT: stc %r2, 4(%r1)
+; VECTOR-NEXT: ste %f0, 0(%r1)
+; VECTOR-NEXT: vst %v1, 24(%r1), 3
+; VECTOR-NEXT: br %r14
+ store %Ty4 %A, ptr @Dst
+ ret void
+}
+
+define void @call4() {
+; CHECK-LABEL: call4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r6, %r15, 48(%r15)
+; CHECK-NEXT: .cfi_offset %r6, -112
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -184
+; CHECK-NEXT: .cfi_def_cfa_offset 344
+; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: lg %r5, 16(%r1)
+; CHECK-NEXT: l %r4, 8(%r1)
+; CHECK-NEXT: le %f0, 0(%r1)
+; CHECK-NEXT: lg %r0, 24(%r1)
+; CHECK-NEXT: lb %r14, 40(%r1)
+; CHECK-NEXT: lg %r13, 32(%r1)
+; CHECK-NEXT: lh %r3, 6(%r1)
+; CHECK-NEXT: lb %r2, 4(%r1)
+; CHECK-NEXT: st %r14, 164(%r15)
+; CHECK-NEXT: stg %r13, 176(%r15)
+; CHECK-NEXT: la %r6, 168(%r15)
+; CHECK-NEXT: stg %r0, 168(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lmg %r6, %r15, 232(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: call4:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r6, %r15, 48(%r15)
+; VECTOR-NEXT: .cfi_offset %r6, -112
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -184
+; VECTOR-NEXT: .cfi_def_cfa_offset 344
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: lh %r3, 6(%r1)
+; VECTOR-NEXT: lb %r2, 4(%r1)
+; VECTOR-NEXT: lb %r0, 40(%r1)
+; VECTOR-NEXT: lg %r5, 16(%r1)
+; VECTOR-NEXT: l %r4, 8(%r1)
+; VECTOR-NEXT: lde %f0, 0(%r1)
+; VECTOR-NEXT: vl %v1, 24(%r1), 3
+; VECTOR-NEXT: la %r6, 168(%r15)
+; VECTOR-NEXT: st %r0, 164(%r15)
+; VECTOR-NEXT: vst %v1, 168(%r15), 3
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: lmg %r6, %r15, 232(%r15)
+; VECTOR-NEXT: br %r14
+ %L = load %Ty4, ptr @Src
+ call void @Fnptr(%Ty4 %L)
+ ret void
+}
+
+define %Ty4 @ret4() {
+; CHECK-LABEL: ret4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -208
+; CHECK-NEXT: .cfi_def_cfa_offset 368
+; CHECK-NEXT: lgr %r13, %r2
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lb %r0, 164(%r15)
+; CHECK-NEXT: lh %r1, 166(%r15)
+; CHECK-NEXT: lg %r2, 192(%r15)
+; CHECK-NEXT: lg %r3, 184(%r15)
+; CHECK-NEXT: le %f0, 160(%r15)
+; CHECK-NEXT: l %r4, 168(%r15)
+; CHECK-NEXT: lg %r5, 176(%r15)
+; CHECK-NEXT: lb %r14, 200(%r15)
+; CHECK-NEXT: ste %f0, 0(%r13)
+; CHECK-NEXT: st %r4, 8(%r13)
+; CHECK-NEXT: stg %r5, 16(%r13)
+; CHECK-NEXT: stc %r14, 40(%r13)
+; CHECK-NEXT: stg %r3, 24(%r13)
+; CHECK-NEXT: stg %r2, 32(%r13)
+; CHECK-NEXT: sth %r1, 6(%r13)
+; CHECK-NEXT: stc %r0, 4(%r13)
+; CHECK-NEXT: lmg %r13, %r15, 312(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: ret4:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT: .cfi_offset %r13, -56
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -208
+; VECTOR-NEXT: .cfi_def_cfa_offset 368
+; VECTOR-NEXT: lgr %r13, %r2
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: lb %r0, 164(%r15)
+; VECTOR-NEXT: lh %r1, 166(%r15)
+; VECTOR-NEXT: lb %r4, 200(%r15)
+; VECTOR-NEXT: lde %f0, 160(%r15)
+; VECTOR-NEXT: l %r2, 168(%r15)
+; VECTOR-NEXT: lg %r3, 176(%r15)
+; VECTOR-NEXT: vl %v1, 184(%r15), 3
+; VECTOR-NEXT: stc %r4, 40(%r13)
+; VECTOR-NEXT: vst %v1, 24(%r13), 3
+; VECTOR-NEXT: stg %r3, 16(%r13)
+; VECTOR-NEXT: st %r2, 8(%r13)
+; VECTOR-NEXT: sth %r1, 6(%r13)
+; VECTOR-NEXT: stc %r0, 4(%r13)
+; VECTOR-NEXT: ste %f0, 0(%r13)
+; VECTOR-NEXT: lmg %r13, %r15, 312(%r15)
+; VECTOR-NEXT: br %r14
+ %C = call %Ty4 @Fnptr()
+ ret %Ty4 %C
+}
+
+%Ty5 = type [4 x i128]
+define void @arg5(%Ty5 %A) {
+; CHECK-LABEL: arg5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r12, %r15, 96(%r15)
+; CHECK-NEXT: .cfi_offset %r12, -64
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: lg %r0, 0(%r2)
+; CHECK-NEXT: lg %r1, 8(%r2)
+; CHECK-NEXT: lg %r2, 0(%r3)
+; CHECK-NEXT: lg %r3, 8(%r3)
+; CHECK-NEXT: lg %r14, 8(%r5)
+; CHECK-NEXT: lgrl %r13, Dst at GOT
+; CHECK-NEXT: lg %r5, 0(%r5)
+; CHECK-NEXT: lg %r12, 8(%r4)
+; CHECK-NEXT: lg %r4, 0(%r4)
+; CHECK-NEXT: stg %r14, 56(%r13)
+; CHECK-NEXT: stg %r5, 48(%r13)
+; CHECK-NEXT: stg %r12, 40(%r13)
+; CHECK-NEXT: stg %r4, 32(%r13)
+; CHECK-NEXT: stg %r3, 24(%r13)
+; CHECK-NEXT: stg %r2, 16(%r13)
+; CHECK-NEXT: stg %r1, 8(%r13)
+; CHECK-NEXT: stg %r0, 0(%r13)
+; CHECK-NEXT: lmg %r12, %r15, 96(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: arg5:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: vl %v0, 0(%r2), 3
+; VECTOR-NEXT: vl %v1, 0(%r3), 3
+; VECTOR-NEXT: vl %v2, 0(%r4), 3
+; VECTOR-NEXT: vl %v3, 0(%r5), 3
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
+; VECTOR-NEXT: vst %v3, 48(%r1), 3
+; VECTOR-NEXT: vst %v2, 32(%r1), 3
+; VECTOR-NEXT: vst %v1, 16(%r1), 3
+; VECTOR-NEXT: vst %v0, 0(%r1), 3
+; VECTOR-NEXT: br %r14
+ store %Ty5 %A, ptr @Dst
+ ret void
+}
+
+define void @call5() {
+; CHECK-LABEL: call5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -224
+; CHECK-NEXT: .cfi_def_cfa_offset 384
+; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: lg %r0, 0(%r1)
+; CHECK-NEXT: lg %r2, 8(%r1)
+; CHECK-NEXT: lg %r3, 16(%r1)
+; CHECK-NEXT: lg %r4, 24(%r1)
+; CHECK-NEXT: lg %r5, 56(%r1)
+; CHECK-NEXT: lg %r14, 48(%r1)
+; CHECK-NEXT: lg %r13, 40(%r1)
+; CHECK-NEXT: lg %r1, 32(%r1)
+; CHECK-NEXT: stg %r5, 168(%r15)
+; CHECK-NEXT: stg %r14, 160(%r15)
+; CHECK-NEXT: stg %r13, 184(%r15)
+; CHECK-NEXT: stg %r1, 176(%r15)
+; CHECK-NEXT: stg %r4, 200(%r15)
+; CHECK-NEXT: stg %r3, 192(%r15)
+; CHECK-NEXT: stg %r2, 216(%r15)
+; CHECK-NEXT: la %r2, 208(%r15)
+; CHECK-NEXT: la %r3, 192(%r15)
+; CHECK-NEXT: la %r4, 176(%r15)
+; CHECK-NEXT: la %r5, 160(%r15)
+; CHECK-NEXT: stg %r0, 208(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lmg %r13, %r15, 328(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: call5:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -224
+; VECTOR-NEXT: .cfi_def_cfa_offset 384
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: vl %v0, 0(%r1), 3
+; VECTOR-NEXT: vl %v1, 16(%r1), 3
+; VECTOR-NEXT: vl %v2, 32(%r1), 3
+; VECTOR-NEXT: vl %v3, 48(%r1), 3
+; VECTOR-NEXT: la %r2, 208(%r15)
+; VECTOR-NEXT: la %r3, 192(%r15)
+; VECTOR-NEXT: la %r4, 176(%r15)
+; VECTOR-NEXT: la %r5, 160(%r15)
+; VECTOR-NEXT: vst %v3, 160(%r15), 3
+; VECTOR-NEXT: vst %v2, 176(%r15), 3
+; VECTOR-NEXT: vst %v1, 192(%r15), 3
+; VECTOR-NEXT: vst %v0, 208(%r15), 3
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: lmg %r14, %r15, 336(%r15)
+; VECTOR-NEXT: br %r14
+ %L = load %Ty5, ptr @Src
+ call void @Fnptr(%Ty5 %L)
+ ret void
+}
+
+define %Ty5 @ret5() {
+; CHECK-LABEL: ret5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r12, %r15, 96(%r15)
+; CHECK-NEXT: .cfi_offset %r12, -64
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -224
+; CHECK-NEXT: .cfi_def_cfa_offset 384
+; CHECK-NEXT: lgr %r13, %r2
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lg %r0, 168(%r15)
+; CHECK-NEXT: lg %r1, 160(%r15)
+; CHECK-NEXT: lg %r2, 184(%r15)
+; CHECK-NEXT: lg %r3, 176(%r15)
+; CHECK-NEXT: lg %r4, 208(%r15)
+; CHECK-NEXT: lg %r5, 216(%r15)
+; CHECK-NEXT: lg %r14, 192(%r15)
+; CHECK-NEXT: lg %r12, 200(%r15)
+; CHECK-NEXT: stg %r4, 48(%r13)
+; CHECK-NEXT: stg %r5, 56(%r13)
+; CHECK-NEXT: stg %r14, 32(%r13)
+; CHECK-NEXT: stg %r12, 40(%r13)
+; CHECK-NEXT: stg %r3, 16(%r13)
+; CHECK-NEXT: stg %r2, 24(%r13)
+; CHECK-NEXT: stg %r1, 0(%r13)
+; CHECK-NEXT: stg %r0, 8(%r13)
+; CHECK-NEXT: lmg %r12, %r15, 320(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: ret5:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT: .cfi_offset %r13, -56
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -224
+; VECTOR-NEXT: .cfi_def_cfa_offset 384
+; VECTOR-NEXT: lgr %r13, %r2
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: vl %v0, 160(%r15), 3
+; VECTOR-NEXT: vl %v1, 176(%r15), 3
+; VECTOR-NEXT: vl %v2, 192(%r15), 3
+; VECTOR-NEXT: vl %v3, 208(%r15), 3
+; VECTOR-NEXT: vst %v3, 48(%r13), 3
+; VECTOR-NEXT: vst %v2, 32(%r13), 3
+; VECTOR-NEXT: vst %v1, 16(%r13), 3
+; VECTOR-NEXT: vst %v0, 0(%r13), 3
+; VECTOR-NEXT: lmg %r13, %r15, 328(%r15)
+; VECTOR-NEXT: br %r14
+ %C = call %Ty5 @Fnptr()
+ ret %Ty5 %C
+}
+
+%Ty6 = type [2 x i72]
+define void @arg6(%Ty6 %A) {
+; CHECK-LABEL: arg6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lg %r0, 8(%r3)
+; CHECK-NEXT: lgrl %r1, Dst at GOT
+; CHECK-NEXT: lg %r4, 8(%r2)
+; CHECK-NEXT: lg %r3, 0(%r3)
+; CHECK-NEXT: lg %r2, 0(%r2)
+; CHECK-NEXT: stc %r0, 24(%r1)
+; CHECK-NEXT: stc %r4, 8(%r1)
+; CHECK-NEXT: sllg %r3, %r3, 56
+; CHECK-NEXT: rosbg %r3, %r0, 8, 63, 56
+; CHECK-NEXT: stg %r3, 16(%r1)
+; CHECK-NEXT: sllg %r0, %r2, 56
+; CHECK-NEXT: rosbg %r0, %r4, 8, 63, 56
+; CHECK-NEXT: stg %r0, 0(%r1)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: arg6:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: vl %v0, 0(%r2), 3
+; VECTOR-NEXT: vl %v1, 0(%r3), 3
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
+; VECTOR-NEXT: vsteb %v1, 24(%r1), 15
+; VECTOR-NEXT: vrepib %v2, 8
+; VECTOR-NEXT: vsteb %v0, 8(%r1), 15
+; VECTOR-NEXT: vsrlb %v1, %v1, %v2
+; VECTOR-NEXT: vsrlb %v0, %v0, %v2
+; VECTOR-NEXT: vsteg %v1, 16(%r1), 1
+; VECTOR-NEXT: vsteg %v0, 0(%r1), 1
+; VECTOR-NEXT: br %r14
+ store %Ty6 %A, ptr @Dst
+ ret void
+}
+
+define void @call6() {
+; CHECK-LABEL: call6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -192
+; CHECK-NEXT: .cfi_def_cfa_offset 352
+; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: lg %r0, 0(%r1)
+; CHECK-NEXT: lg %r2, 16(%r1)
+; CHECK-NEXT: sllg %r3, %r0, 8
+; CHECK-NEXT: sllg %r4, %r2, 8
+; CHECK-NEXT: ic %r4, 24(%r1)
+; CHECK-NEXT: ic %r3, 8(%r1)
+; CHECK-NEXT: srlg %r0, %r0, 56
+; CHECK-NEXT: srlg %r1, %r2, 56
+; CHECK-NEXT: stg %r4, 168(%r15)
+; CHECK-NEXT: stg %r1, 160(%r15)
+; CHECK-NEXT: stg %r3, 184(%r15)
+; CHECK-NEXT: la %r2, 176(%r15)
+; CHECK-NEXT: la %r3, 160(%r15)
+; CHECK-NEXT: stg %r0, 176(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: call6:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -192
+; VECTOR-NEXT: .cfi_def_cfa_offset 352
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: vgbm %v1, 0
+; VECTOR-NEXT: vleb %v1, 8(%r1), 15
+; VECTOR-NEXT: vlrepg %v2, 0(%r1)
+; VECTOR-NEXT: vrepib %v3, 8
+; VECTOR-NEXT: vslb %v2, %v2, %v3
+; VECTOR-NEXT: vgbm %v0, 0
+; VECTOR-NEXT: vo %v1, %v1, %v2
+; VECTOR-NEXT: vleb %v0, 24(%r1), 15
+; VECTOR-NEXT: vlrepg %v2, 16(%r1)
+; VECTOR-NEXT: vslb %v2, %v2, %v3
+; VECTOR-NEXT: vo %v0, %v0, %v2
+; VECTOR-NEXT: la %r2, 176(%r15)
+; VECTOR-NEXT: la %r3, 160(%r15)
+; VECTOR-NEXT: vst %v0, 160(%r15), 3
+; VECTOR-NEXT: vst %v1, 176(%r15), 3
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT: br %r14
+ %L = load %Ty6, ptr @Src
+ call void @Fnptr(%Ty6 %L)
+ ret void
+}
+
+define %Ty6 @ret6() {
+; CHECK-LABEL: ret6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -192
+; CHECK-NEXT: .cfi_def_cfa_offset 352
+; CHECK-NEXT: lgr %r13, %r2
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lg %r0, 176(%r15)
+; CHECK-NEXT: llgc %r1, 184(%r15)
+; CHECK-NEXT: lg %r2, 160(%r15)
+; CHECK-NEXT: llgc %r3, 168(%r15)
+; CHECK-NEXT: stg %r0, 16(%r13)
+; CHECK-NEXT: stc %r1, 24(%r13)
+; CHECK-NEXT: stg %r2, 0(%r13)
+; CHECK-NEXT: stc %r3, 8(%r13)
+; CHECK-NEXT: lmg %r13, %r15, 296(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: ret6:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT: .cfi_offset %r13, -56
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -192
+; VECTOR-NEXT: .cfi_def_cfa_offset 352
+; VECTOR-NEXT: lgr %r13, %r2
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: vgbm %v0, 0
+; VECTOR-NEXT: vgbm %v1, 0
+; VECTOR-NEXT: vleb %v1, 168(%r15), 15
+; VECTOR-NEXT: vleb %v0, 184(%r15), 15
+; VECTOR-NEXT: vlrepg %v2, 160(%r15)
+; VECTOR-NEXT: vlrepg %v3, 176(%r15)
+; VECTOR-NEXT: vsteg %v3, 16(%r13), 1
+; VECTOR-NEXT: vsteb %v0, 24(%r13), 15
+; VECTOR-NEXT: vsteg %v2, 0(%r13), 1
+; VECTOR-NEXT: vsteb %v1, 8(%r13), 15
+; VECTOR-NEXT: lmg %r13, %r15, 296(%r15)
+; VECTOR-NEXT: br %r14
+ %C = call %Ty6 @Fnptr()
+ ret %Ty6 %C
+}
+
+%Ty7 = type {i128}
+define void @arg7(%Ty7 %A, %Ty7 %B) {
+; CHECK-LABEL: arg7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lg %r0, 8(%r2)
+; CHECK-NEXT: lgrl %r1, Dst at GOT
+; CHECK-NEXT: lg %r2, 0(%r2)
+; CHECK-NEXT: lg %r4, 8(%r3)
+; CHECK-NEXT: lg %r3, 0(%r3)
+; CHECK-NEXT: stg %r0, 8(%r1)
+; CHECK-NEXT: stg %r2, 0(%r1)
+; CHECK-NEXT: stg %r4, 24(%r1)
+; CHECK-NEXT: stg %r3, 16(%r1)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: arg7:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: vl %v0, 0(%r3), 3
+; VECTOR-NEXT: vl %v1, 0(%r2), 3
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
+; VECTOR-NEXT: vst %v1, 0(%r1), 3
+; VECTOR-NEXT: vst %v0, 16(%r1), 3
+; VECTOR-NEXT: br %r14
+ store %Ty7 %A, ptr @Dst
+ %D2 = getelementptr %Ty7, ptr @Dst, i32 1
+ store %Ty7 %B, ptr %D2
+ ret void
+}
+
+define void @call7() {
+; CHECK-LABEL: call7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -192
+; CHECK-NEXT: .cfi_def_cfa_offset 352
+; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: lg %r0, 24(%r1)
+; CHECK-NEXT: lg %r2, 16(%r1)
+; CHECK-NEXT: lg %r3, 8(%r1)
+; CHECK-NEXT: lg %r1, 0(%r1)
+; CHECK-NEXT: stg %r0, 168(%r15)
+; CHECK-NEXT: stg %r2, 160(%r15)
+; CHECK-NEXT: stg %r3, 184(%r15)
+; CHECK-NEXT: la %r2, 176(%r15)
+; CHECK-NEXT: la %r3, 160(%r15)
+; CHECK-NEXT: stg %r1, 176(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: call7:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -192
+; VECTOR-NEXT: .cfi_def_cfa_offset 352
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: vl %v0, 0(%r1), 3
+; VECTOR-NEXT: vl %v1, 16(%r1), 3
+; VECTOR-NEXT: la %r2, 176(%r15)
+; VECTOR-NEXT: la %r3, 160(%r15)
+; VECTOR-NEXT: vst %v1, 160(%r15), 3
+; VECTOR-NEXT: vst %v0, 176(%r15), 3
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT: br %r14
+ %L = load %Ty7, ptr @Src
+ %S2 = getelementptr %Ty7, ptr @Src, i32 1
+ %L2 = load %Ty7, ptr %S2
+ call void @Fnptr(%Ty7 %L, %Ty7 %L2)
+ ret void
+}
More information about the llvm-commits
mailing list