[llvm] 0b252da - [SystemZ] Handle IR struct arguments correctly. (#169583)

via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 4 11:14:35 PST 2025


Author: Jonas Paulsson
Date: 2025-12-04T13:14:31-06:00
New Revision: 0b252daf64153f57203f19aa9ea13a3d72058b19

URL: https://github.com/llvm/llvm-project/commit/0b252daf64153f57203f19aa9ea13a3d72058b19
DIFF: https://github.com/llvm/llvm-project/commit/0b252daf64153f57203f19aa9ea13a3d72058b19.diff

LOG: [SystemZ] Handle IR struct arguments correctly. (#169583)

- The size of the stack slot was previously computed in LowerCall() by using
  the original type, but that didn't work for a struct. Compute the size
  by looking at the VT of each part and the number of them instead.

- All the members of a struct have the same OrigArgIndex, so it doesn't work
  to assume that following parts belong to a split argument until another
  OrigArgIndex is encountered. Use the isSplit() and isSplitEnd() flags
  instead.

- Detect any scalar integer argumet >64 bits in CanLowerReturn() instead of
  just i128, in order to let all of them be passed on stack.
  
Fixes #168460

Added: 
    llvm/test/CodeGen/SystemZ/args-22.ll

Modified: 
    llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index eb93024bed35c..2511d08a6d0ef 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1970,6 +1970,28 @@ SDValue SystemZTargetLowering::joinRegisterPartsIntoValue(
   return SDValue();
 }
 
+// The first part of a split stack argument is at index I in Args (and
+// ArgLocs). Return the type of a part and the number of them by reference.
+template <class ArgTy>
+static bool analyzeArgSplit(const SmallVectorImpl<ArgTy> &Args,
+                            SmallVector<CCValAssign, 16> &ArgLocs, unsigned I,
+                            MVT &PartVT, unsigned &NumParts) {
+  if (!Args[I].Flags.isSplit())
+    return false;
+  assert(I < ArgLocs.size() && ArgLocs.size() == Args.size() &&
+         "ArgLocs havoc.");
+  PartVT = ArgLocs[I].getValVT();
+  NumParts = 1;
+  for (unsigned PartIdx = I + 1;; ++PartIdx) {
+    assert(PartIdx != ArgLocs.size() && "SplitEnd not found.");
+    assert(ArgLocs[PartIdx].getValVT() == PartVT && "Unsupported split.");
+    ++NumParts;
+    if (Args[PartIdx].Flags.isSplitEnd())
+      break;
+  }
+  return true;
+}
+
 SDValue SystemZTargetLowering::LowerFormalArguments(
     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
@@ -2074,16 +2096,26 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
                                    MachinePointerInfo()));
       // If the original argument was split (e.g. i128), we need
       // to load all parts of it here (using the same address).
-      unsigned ArgIndex = Ins[I].OrigArgIndex;
-      assert (Ins[I].PartOffset == 0);
-      while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
-        CCValAssign &PartVA = ArgLocs[I + 1];
-        unsigned PartOffset = Ins[I + 1].PartOffset;
-        SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
-                                      DAG.getIntPtrConstant(PartOffset, DL));
-        InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
-                                     MachinePointerInfo()));
-        ++I;
+      MVT PartVT;
+      unsigned NumParts;
+      if (analyzeArgSplit(Ins, ArgLocs, I, PartVT, NumParts)) {
+        // TODO: It is strange that while LowerCallTo() sets the PartOffset
+        // relative to the first split part LowerArguments() sets the offset
+        // from the beginning of the struct. So with {i32, i256}, the
+        // PartOffset for the i256 parts are 
diff erently handled. Try to
+        // remove that 
diff erence and use PartOffset directly here (instead
+        // of SplitBaseOffs).
+        unsigned SplitBaseOffs = Ins[I].PartOffset;
+        for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
+          ++I;
+          CCValAssign &PartVA = ArgLocs[I];
+          unsigned PartOffset = Ins[I].PartOffset - SplitBaseOffs;
+          SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
+                                        DAG.getIntPtrConstant(PartOffset, DL));
+          InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
+                                       MachinePointerInfo()));
+          assert(PartOffset && "Offset should be non-zero.");
+        }
       }
     } else
       InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
@@ -2319,18 +2351,13 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
 
     if (VA.getLocInfo() == CCValAssign::Indirect) {
       // Store the argument in a stack slot and pass its address.
-      unsigned ArgIndex = Outs[I].OrigArgIndex;
       EVT SlotVT;
-      if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
-        // Allocate the full stack space for a promoted (and split) argument.
-        Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
-        EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
-        MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
-        unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
-        SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
-      } else {
+      MVT PartVT;
+      unsigned NumParts = 1;
+      if (analyzeArgSplit(Outs, ArgLocs, I, PartVT, NumParts))
+        SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * NumParts);
+      else
         SlotVT = Outs[I].VT;
-      }
       SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
       MemOpChains.push_back(
@@ -2338,18 +2365,19 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
                        MachinePointerInfo::getFixedStack(MF, FI)));
       // If the original argument was split (e.g. i128), we need
       // to store all parts of it here (and pass just one address).
-      assert (Outs[I].PartOffset == 0);
-      while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
-        SDValue PartValue = OutVals[I + 1];
-        unsigned PartOffset = Outs[I + 1].PartOffset;
+      assert(Outs[I].PartOffset == 0);
+      for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
+        ++I;
+        SDValue PartValue = OutVals[I];
+        unsigned PartOffset = Outs[I].PartOffset;
         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
                                       DAG.getIntPtrConstant(PartOffset, DL));
         MemOpChains.push_back(
             DAG.getStore(Chain, DL, PartValue, Address,
                          MachinePointerInfo::getFixedStack(MF, FI)));
+        assert(PartOffset && "Offset should be non-zero.");
         assert((PartOffset + PartValue.getValueType().getStoreSize() <=
                 SlotVT.getStoreSize()) && "Not enough space for argument part!");
-        ++I;
       }
       ArgValue = SpillSlot;
     } else
@@ -2534,7 +2562,7 @@ bool SystemZTargetLowering::CanLowerReturn(
   // Special case that we cannot easily detect in RetCC_SystemZ since
   // i128 may not be a legal type.
   for (auto &Out : Outs)
-    if (Out.ArgVT == MVT::i128)
+    if (Out.ArgVT.isScalarInteger() && Out.ArgVT.getSizeInBits() > 64)
       return false;
 
   SmallVector<CCValAssign, 16> RetLocs;

diff  --git a/llvm/test/CodeGen/SystemZ/args-22.ll b/llvm/test/CodeGen/SystemZ/args-22.ll
new file mode 100644
index 0000000000000..ba422b65fc299
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/args-22.ll
@@ -0,0 +1,1004 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s --check-prefix=VECTOR
+;
+; Test passing IR struct arguments, which do not adhere to the SystemZ ABI but are
+; split up with each element passed like a separate argument.
+
+ at Fnptr = external global ptr
+ at Src = external global ptr
+ at Dst = external global ptr
+
+%Ty0 = type {i128}
+define void @arg0(%Ty0 %A) {
+; CHECK-LABEL: arg0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lg %r0, 8(%r2)
+; CHECK-NEXT:    lgrl %r1, Dst at GOT
+; CHECK-NEXT:    lg %r2, 0(%r2)
+; CHECK-NEXT:    stg %r0, 8(%r1)
+; CHECK-NEXT:    stg %r2, 0(%r1)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: arg0:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    vl %v0, 0(%r2), 3
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
+; VECTOR-NEXT:    vst %v0, 0(%r1), 3
+; VECTOR-NEXT:    br %r14
+  store %Ty0 %A, ptr @Dst
+  ret void
+}
+
+define void @call0() {
+; CHECK-LABEL: call0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -176
+; CHECK-NEXT:    .cfi_def_cfa_offset 336
+; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    lg %r0, 8(%r1)
+; CHECK-NEXT:    lg %r1, 0(%r1)
+; CHECK-NEXT:    stg %r0, 168(%r15)
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    stg %r1, 160(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lmg %r14, %r15, 288(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: call0:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -176
+; VECTOR-NEXT:    .cfi_def_cfa_offset 336
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    vl %v0, 0(%r1), 3
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    vst %v0, 160(%r15), 3
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    lmg %r14, %r15, 288(%r15)
+; VECTOR-NEXT:    br %r14
+  %L = load %Ty0, ptr @Src
+  call void @Fnptr(%Ty0 %L)
+  ret void
+}
+
+define %Ty0 @ret0() {
+; CHECK-LABEL: ret0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -176
+; CHECK-NEXT:    .cfi_def_cfa_offset 336
+; CHECK-NEXT:    lgr %r13, %r2
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lg %r0, 168(%r15)
+; CHECK-NEXT:    lg %r1, 160(%r15)
+; CHECK-NEXT:    stg %r0, 8(%r13)
+; CHECK-NEXT:    stg %r1, 0(%r13)
+; CHECK-NEXT:    lmg %r13, %r15, 280(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: ret0:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT:    .cfi_offset %r13, -56
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -176
+; VECTOR-NEXT:    .cfi_def_cfa_offset 336
+; VECTOR-NEXT:    lgr %r13, %r2
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    vl %v0, 160(%r15), 3
+; VECTOR-NEXT:    vst %v0, 0(%r13), 3
+; VECTOR-NEXT:    lmg %r13, %r15, 280(%r15)
+; VECTOR-NEXT:    br %r14
+  %C = call %Ty0 @Fnptr()
+  ret %Ty0 %C
+}
+
+%Ty1 = type {i72}
+define void @arg1(%Ty1 %A) {
+; CHECK-LABEL: arg1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lg %r0, 8(%r2)
+; CHECK-NEXT:    lgrl %r1, Dst at GOT
+; CHECK-NEXT:    lg %r2, 0(%r2)
+; CHECK-NEXT:    stc %r0, 8(%r1)
+; CHECK-NEXT:    sllg %r2, %r2, 56
+; CHECK-NEXT:    rosbg %r2, %r0, 8, 63, 56
+; CHECK-NEXT:    stg %r2, 0(%r1)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: arg1:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    vl %v0, 0(%r2), 3
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
+; VECTOR-NEXT:    vrepib %v1, 8
+; VECTOR-NEXT:    vsteb %v0, 8(%r1), 15
+; VECTOR-NEXT:    vsrlb %v0, %v0, %v1
+; VECTOR-NEXT:    vsteg %v0, 0(%r1), 1
+; VECTOR-NEXT:    br %r14
+  store %Ty1 %A, ptr @Dst
+  ret void
+}
+
+define void @call1() {
+; CHECK-LABEL: call1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -176
+; CHECK-NEXT:    .cfi_def_cfa_offset 336
+; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    lg %r0, 0(%r1)
+; CHECK-NEXT:    sllg %r2, %r0, 8
+; CHECK-NEXT:    ic %r2, 8(%r1)
+; CHECK-NEXT:    srlg %r0, %r0, 56
+; CHECK-NEXT:    stg %r2, 168(%r15)
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    stg %r0, 160(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lmg %r14, %r15, 288(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: call1:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -176
+; VECTOR-NEXT:    .cfi_def_cfa_offset 336
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    vgbm %v0, 0
+; VECTOR-NEXT:    vleb %v0, 8(%r1), 15
+; VECTOR-NEXT:    vlrepg %v1, 0(%r1)
+; VECTOR-NEXT:    vrepib %v2, 8
+; VECTOR-NEXT:    vslb %v1, %v1, %v2
+; VECTOR-NEXT:    vo %v0, %v0, %v1
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    vst %v0, 160(%r15), 3
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    lmg %r14, %r15, 288(%r15)
+; VECTOR-NEXT:    br %r14
+  %L = load %Ty1, ptr @Src
+  call void @Fnptr(%Ty1 %L)
+  ret void
+}
+
+define %Ty1 @ret1() {
+; CHECK-LABEL: ret1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -176
+; CHECK-NEXT:    .cfi_def_cfa_offset 336
+; CHECK-NEXT:    lgr %r13, %r2
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lg %r0, 160(%r15)
+; CHECK-NEXT:    llgc %r1, 168(%r15)
+; CHECK-NEXT:    stg %r0, 0(%r13)
+; CHECK-NEXT:    stc %r1, 8(%r13)
+; CHECK-NEXT:    lmg %r13, %r15, 280(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: ret1:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT:    .cfi_offset %r13, -56
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -176
+; VECTOR-NEXT:    .cfi_def_cfa_offset 336
+; VECTOR-NEXT:    lgr %r13, %r2
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    vgbm %v0, 0
+; VECTOR-NEXT:    vleb %v0, 168(%r15), 15
+; VECTOR-NEXT:    vlrepg %v1, 160(%r15)
+; VECTOR-NEXT:    vsteg %v1, 0(%r13), 1
+; VECTOR-NEXT:    vsteb %v0, 8(%r13), 15
+; VECTOR-NEXT:    lmg %r13, %r15, 280(%r15)
+; VECTOR-NEXT:    br %r14
+  %C = call %Ty1 @Fnptr()
+  ret %Ty1 %C
+}
+
+%Ty2 = type {i128, i128}
+define void @arg2(%Ty2 %A) {
+; CHECK-LABEL: arg2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lg %r0, 8(%r3)
+; CHECK-NEXT:    lgrl %r1, Dst at GOT
+; CHECK-NEXT:    lg %r3, 0(%r3)
+; CHECK-NEXT:    lg %r4, 8(%r2)
+; CHECK-NEXT:    lg %r2, 0(%r2)
+; CHECK-NEXT:    stg %r0, 24(%r1)
+; CHECK-NEXT:    stg %r3, 16(%r1)
+; CHECK-NEXT:    stg %r4, 8(%r1)
+; CHECK-NEXT:    stg %r2, 0(%r1)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: arg2:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    vl %v0, 0(%r2), 3
+; VECTOR-NEXT:    vl %v1, 0(%r3), 3
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
+; VECTOR-NEXT:    vst %v1, 16(%r1), 3
+; VECTOR-NEXT:    vst %v0, 0(%r1), 3
+; VECTOR-NEXT:    br %r14
+  store %Ty2 %A, ptr @Dst
+  ret void
+}
+
+define void @call2() {
+; CHECK-LABEL: call2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 352
+; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    lg %r0, 24(%r1)
+; CHECK-NEXT:    lg %r2, 16(%r1)
+; CHECK-NEXT:    lg %r3, 8(%r1)
+; CHECK-NEXT:    lg %r1, 0(%r1)
+; CHECK-NEXT:    stg %r0, 168(%r15)
+; CHECK-NEXT:    stg %r2, 160(%r15)
+; CHECK-NEXT:    stg %r3, 184(%r15)
+; CHECK-NEXT:    la %r2, 176(%r15)
+; CHECK-NEXT:    la %r3, 160(%r15)
+; CHECK-NEXT:    stg %r1, 176(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: call2:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -192
+; VECTOR-NEXT:    .cfi_def_cfa_offset 352
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    vl %v0, 0(%r1), 3
+; VECTOR-NEXT:    vl %v1, 16(%r1), 3
+; VECTOR-NEXT:    la %r2, 176(%r15)
+; VECTOR-NEXT:    la %r3, 160(%r15)
+; VECTOR-NEXT:    vst %v1, 160(%r15), 3
+; VECTOR-NEXT:    vst %v0, 176(%r15), 3
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT:    br %r14
+  %L = load %Ty2, ptr @Src
+  call void @Fnptr(%Ty2 %L)
+  ret void
+}
+
+define %Ty2 @ret2() {
+; CHECK-LABEL: ret2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 352
+; CHECK-NEXT:    lgr %r13, %r2
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lg %r0, 176(%r15)
+; CHECK-NEXT:    lg %r1, 184(%r15)
+; CHECK-NEXT:    lg %r2, 160(%r15)
+; CHECK-NEXT:    lg %r3, 168(%r15)
+; CHECK-NEXT:    stg %r0, 16(%r13)
+; CHECK-NEXT:    stg %r1, 24(%r13)
+; CHECK-NEXT:    stg %r2, 0(%r13)
+; CHECK-NEXT:    stg %r3, 8(%r13)
+; CHECK-NEXT:    lmg %r13, %r15, 296(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: ret2:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT:    .cfi_offset %r13, -56
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -192
+; VECTOR-NEXT:    .cfi_def_cfa_offset 352
+; VECTOR-NEXT:    lgr %r13, %r2
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    vl %v0, 160(%r15), 3
+; VECTOR-NEXT:    vl %v1, 176(%r15), 3
+; VECTOR-NEXT:    vst %v1, 16(%r13), 3
+; VECTOR-NEXT:    vst %v0, 0(%r13), 3
+; VECTOR-NEXT:    lmg %r13, %r15, 296(%r15)
+; VECTOR-NEXT:    br %r14
+  %C = call %Ty2 @Fnptr()
+  ret %Ty2 %C
+}
+
+%Ty3 = type {i72, i128}
+define void @arg3(%Ty3 %A) {
+; CHECK-LABEL: arg3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lg %r0, 8(%r3)
+; CHECK-NEXT:    lgrl %r1, Dst at GOT
+; CHECK-NEXT:    lg %r3, 0(%r3)
+; CHECK-NEXT:    lg %r4, 8(%r2)
+; CHECK-NEXT:    lg %r2, 0(%r2)
+; CHECK-NEXT:    stg %r0, 24(%r1)
+; CHECK-NEXT:    stg %r3, 16(%r1)
+; CHECK-NEXT:    stc %r4, 8(%r1)
+; CHECK-NEXT:    sllg %r0, %r2, 56
+; CHECK-NEXT:    rosbg %r0, %r4, 8, 63, 56
+; CHECK-NEXT:    stg %r0, 0(%r1)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: arg3:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    vl %v0, 0(%r3), 3
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
+; VECTOR-NEXT:    vl %v1, 0(%r2), 3
+; VECTOR-NEXT:    vsteb %v1, 8(%r1), 15
+; VECTOR-NEXT:    vst %v0, 16(%r1), 3
+; VECTOR-NEXT:    vrepib %v0, 8
+; VECTOR-NEXT:    vsrlb %v0, %v1, %v0
+; VECTOR-NEXT:    vsteg %v0, 0(%r1), 1
+; VECTOR-NEXT:    br %r14
+  store %Ty3 %A, ptr @Dst
+  ret void
+}
+
+define void @call3() {
+; CHECK-LABEL: call3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 352
+; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    lg %r0, 0(%r1)
+; CHECK-NEXT:    sllg %r2, %r0, 8
+; CHECK-NEXT:    lg %r3, 24(%r1)
+; CHECK-NEXT:    lg %r4, 16(%r1)
+; CHECK-NEXT:    ic %r2, 8(%r1)
+; CHECK-NEXT:    srlg %r0, %r0, 56
+; CHECK-NEXT:    stg %r3, 168(%r15)
+; CHECK-NEXT:    stg %r4, 160(%r15)
+; CHECK-NEXT:    stg %r2, 184(%r15)
+; CHECK-NEXT:    la %r2, 176(%r15)
+; CHECK-NEXT:    la %r3, 160(%r15)
+; CHECK-NEXT:    stg %r0, 176(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: call3:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -192
+; VECTOR-NEXT:    .cfi_def_cfa_offset 352
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    vgbm %v0, 0
+; VECTOR-NEXT:    vleb %v0, 8(%r1), 15
+; VECTOR-NEXT:    vlrepg %v1, 0(%r1)
+; VECTOR-NEXT:    vrepib %v2, 8
+; VECTOR-NEXT:    vslb %v1, %v1, %v2
+; VECTOR-NEXT:    vo %v0, %v0, %v1
+; VECTOR-NEXT:    vl %v1, 16(%r1), 3
+; VECTOR-NEXT:    la %r2, 176(%r15)
+; VECTOR-NEXT:    la %r3, 160(%r15)
+; VECTOR-NEXT:    vst %v1, 160(%r15), 3
+; VECTOR-NEXT:    vst %v0, 176(%r15), 3
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT:    br %r14
+  %L = load %Ty3, ptr @Src
+  call void @Fnptr(%Ty3 %L)
+  ret void
+}
+
+define %Ty3 @ret3() {
+; CHECK-LABEL: ret3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 352
+; CHECK-NEXT:    lgr %r13, %r2
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lg %r0, 176(%r15)
+; CHECK-NEXT:    lg %r1, 184(%r15)
+; CHECK-NEXT:    lg %r2, 160(%r15)
+; CHECK-NEXT:    llgc %r3, 168(%r15)
+; CHECK-NEXT:    stg %r0, 16(%r13)
+; CHECK-NEXT:    stg %r1, 24(%r13)
+; CHECK-NEXT:    stg %r2, 0(%r13)
+; CHECK-NEXT:    stc %r3, 8(%r13)
+; CHECK-NEXT:    lmg %r13, %r15, 296(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: ret3:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT:    .cfi_offset %r13, -56
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -192
+; VECTOR-NEXT:    .cfi_def_cfa_offset 352
+; VECTOR-NEXT:    lgr %r13, %r2
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    vgbm %v0, 0
+; VECTOR-NEXT:    vleb %v0, 168(%r15), 15
+; VECTOR-NEXT:    vlrepg %v1, 160(%r15)
+; VECTOR-NEXT:    vl %v2, 176(%r15), 3
+; VECTOR-NEXT:    vst %v2, 16(%r13), 3
+; VECTOR-NEXT:    vsteg %v1, 0(%r13), 1
+; VECTOR-NEXT:    vsteb %v0, 8(%r13), 15
+; VECTOR-NEXT:    lmg %r13, %r15, 296(%r15)
+; VECTOR-NEXT:    br %r14
+  %C = call %Ty3 @Fnptr()
+  ret %Ty3 %C
+}
+
+%Ty4 = type {float, i8, i16, i32, i64, i128, i8}
+define void @arg4(%Ty4 %A) {
+; CHECK-LABEL: arg4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    l %r0, 164(%r15)
+; CHECK-NEXT:    lgrl %r1, Dst at GOT
+; CHECK-NEXT:    lg %r14, 0(%r6)
+; CHECK-NEXT:    lg %r13, 8(%r6)
+; CHECK-NEXT:    stc %r0, 40(%r1)
+; CHECK-NEXT:    stg %r5, 16(%r1)
+; CHECK-NEXT:    st %r4, 8(%r1)
+; CHECK-NEXT:    sth %r3, 6(%r1)
+; CHECK-NEXT:    stc %r2, 4(%r1)
+; CHECK-NEXT:    ste %f0, 0(%r1)
+; CHECK-NEXT:    stg %r13, 32(%r1)
+; CHECK-NEXT:    stg %r14, 24(%r1)
+; CHECK-NEXT:    lmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: arg4:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    vl %v1, 0(%r6), 3
+; VECTOR-NEXT:    l %r0, 164(%r15)
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
+; VECTOR-NEXT:    stc %r0, 40(%r1)
+; VECTOR-NEXT:    stg %r5, 16(%r1)
+; VECTOR-NEXT:    st %r4, 8(%r1)
+; VECTOR-NEXT:    sth %r3, 6(%r1)
+; VECTOR-NEXT:    stc %r2, 4(%r1)
+; VECTOR-NEXT:    ste %f0, 0(%r1)
+; VECTOR-NEXT:    vst %v1, 24(%r1), 3
+; VECTOR-NEXT:    br %r14
+  store %Ty4 %A, ptr @Dst
+  ret void
+}
+
+define void @call4() {
+; CHECK-LABEL: call4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r6, %r15, 48(%r15)
+; CHECK-NEXT:    .cfi_offset %r6, -112
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -184
+; CHECK-NEXT:    .cfi_def_cfa_offset 344
+; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    lg %r5, 16(%r1)
+; CHECK-NEXT:    l %r4, 8(%r1)
+; CHECK-NEXT:    le %f0, 0(%r1)
+; CHECK-NEXT:    lg %r0, 24(%r1)
+; CHECK-NEXT:    lb %r14, 40(%r1)
+; CHECK-NEXT:    lg %r13, 32(%r1)
+; CHECK-NEXT:    lh %r3, 6(%r1)
+; CHECK-NEXT:    lb %r2, 4(%r1)
+; CHECK-NEXT:    st %r14, 164(%r15)
+; CHECK-NEXT:    stg %r13, 176(%r15)
+; CHECK-NEXT:    la %r6, 168(%r15)
+; CHECK-NEXT:    stg %r0, 168(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lmg %r6, %r15, 232(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: call4:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r6, %r15, 48(%r15)
+; VECTOR-NEXT:    .cfi_offset %r6, -112
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -184
+; VECTOR-NEXT:    .cfi_def_cfa_offset 344
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    lh %r3, 6(%r1)
+; VECTOR-NEXT:    lb %r2, 4(%r1)
+; VECTOR-NEXT:    lb %r0, 40(%r1)
+; VECTOR-NEXT:    lg %r5, 16(%r1)
+; VECTOR-NEXT:    l %r4, 8(%r1)
+; VECTOR-NEXT:    lde %f0, 0(%r1)
+; VECTOR-NEXT:    vl %v1, 24(%r1), 3
+; VECTOR-NEXT:    la %r6, 168(%r15)
+; VECTOR-NEXT:    st %r0, 164(%r15)
+; VECTOR-NEXT:    vst %v1, 168(%r15), 3
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    lmg %r6, %r15, 232(%r15)
+; VECTOR-NEXT:    br %r14
+  %L = load %Ty4, ptr @Src
+  call void @Fnptr(%Ty4 %L)
+  ret void
+}
+
+define %Ty4 @ret4() {
+; CHECK-LABEL: ret4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -208
+; CHECK-NEXT:    .cfi_def_cfa_offset 368
+; CHECK-NEXT:    lgr %r13, %r2
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lb %r0, 164(%r15)
+; CHECK-NEXT:    lh %r1, 166(%r15)
+; CHECK-NEXT:    lg %r2, 192(%r15)
+; CHECK-NEXT:    lg %r3, 184(%r15)
+; CHECK-NEXT:    le %f0, 160(%r15)
+; CHECK-NEXT:    l %r4, 168(%r15)
+; CHECK-NEXT:    lg %r5, 176(%r15)
+; CHECK-NEXT:    lb %r14, 200(%r15)
+; CHECK-NEXT:    ste %f0, 0(%r13)
+; CHECK-NEXT:    st %r4, 8(%r13)
+; CHECK-NEXT:    stg %r5, 16(%r13)
+; CHECK-NEXT:    stc %r14, 40(%r13)
+; CHECK-NEXT:    stg %r3, 24(%r13)
+; CHECK-NEXT:    stg %r2, 32(%r13)
+; CHECK-NEXT:    sth %r1, 6(%r13)
+; CHECK-NEXT:    stc %r0, 4(%r13)
+; CHECK-NEXT:    lmg %r13, %r15, 312(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: ret4:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT:    .cfi_offset %r13, -56
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -208
+; VECTOR-NEXT:    .cfi_def_cfa_offset 368
+; VECTOR-NEXT:    lgr %r13, %r2
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    lb %r0, 164(%r15)
+; VECTOR-NEXT:    lh %r1, 166(%r15)
+; VECTOR-NEXT:    lb %r4, 200(%r15)
+; VECTOR-NEXT:    lde %f0, 160(%r15)
+; VECTOR-NEXT:    l %r2, 168(%r15)
+; VECTOR-NEXT:    lg %r3, 176(%r15)
+; VECTOR-NEXT:    vl %v1, 184(%r15), 3
+; VECTOR-NEXT:    stc %r4, 40(%r13)
+; VECTOR-NEXT:    vst %v1, 24(%r13), 3
+; VECTOR-NEXT:    stg %r3, 16(%r13)
+; VECTOR-NEXT:    st %r2, 8(%r13)
+; VECTOR-NEXT:    sth %r1, 6(%r13)
+; VECTOR-NEXT:    stc %r0, 4(%r13)
+; VECTOR-NEXT:    ste %f0, 0(%r13)
+; VECTOR-NEXT:    lmg %r13, %r15, 312(%r15)
+; VECTOR-NEXT:    br %r14
+  %C = call %Ty4 @Fnptr()
+  ret %Ty4 %C
+}
+
+%Ty5 = type [4 x i128]
+define void @arg5(%Ty5 %A) {
+; CHECK-LABEL: arg5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r12, %r15, 96(%r15)
+; CHECK-NEXT:    .cfi_offset %r12, -64
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    lg %r0, 0(%r2)
+; CHECK-NEXT:    lg %r1, 8(%r2)
+; CHECK-NEXT:    lg %r2, 0(%r3)
+; CHECK-NEXT:    lg %r3, 8(%r3)
+; CHECK-NEXT:    lg %r14, 8(%r5)
+; CHECK-NEXT:    lgrl %r13, Dst at GOT
+; CHECK-NEXT:    lg %r5, 0(%r5)
+; CHECK-NEXT:    lg %r12, 8(%r4)
+; CHECK-NEXT:    lg %r4, 0(%r4)
+; CHECK-NEXT:    stg %r14, 56(%r13)
+; CHECK-NEXT:    stg %r5, 48(%r13)
+; CHECK-NEXT:    stg %r12, 40(%r13)
+; CHECK-NEXT:    stg %r4, 32(%r13)
+; CHECK-NEXT:    stg %r3, 24(%r13)
+; CHECK-NEXT:    stg %r2, 16(%r13)
+; CHECK-NEXT:    stg %r1, 8(%r13)
+; CHECK-NEXT:    stg %r0, 0(%r13)
+; CHECK-NEXT:    lmg %r12, %r15, 96(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: arg5:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    vl %v0, 0(%r2), 3
+; VECTOR-NEXT:    vl %v1, 0(%r3), 3
+; VECTOR-NEXT:    vl %v2, 0(%r4), 3
+; VECTOR-NEXT:    vl %v3, 0(%r5), 3
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
+; VECTOR-NEXT:    vst %v3, 48(%r1), 3
+; VECTOR-NEXT:    vst %v2, 32(%r1), 3
+; VECTOR-NEXT:    vst %v1, 16(%r1), 3
+; VECTOR-NEXT:    vst %v0, 0(%r1), 3
+; VECTOR-NEXT:    br %r14
+  store %Ty5 %A, ptr @Dst
+  ret void
+}
+
+define void @call5() {
+; CHECK-LABEL: call5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -224
+; CHECK-NEXT:    .cfi_def_cfa_offset 384
+; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    lg %r0, 0(%r1)
+; CHECK-NEXT:    lg %r2, 8(%r1)
+; CHECK-NEXT:    lg %r3, 16(%r1)
+; CHECK-NEXT:    lg %r4, 24(%r1)
+; CHECK-NEXT:    lg %r5, 56(%r1)
+; CHECK-NEXT:    lg %r14, 48(%r1)
+; CHECK-NEXT:    lg %r13, 40(%r1)
+; CHECK-NEXT:    lg %r1, 32(%r1)
+; CHECK-NEXT:    stg %r5, 168(%r15)
+; CHECK-NEXT:    stg %r14, 160(%r15)
+; CHECK-NEXT:    stg %r13, 184(%r15)
+; CHECK-NEXT:    stg %r1, 176(%r15)
+; CHECK-NEXT:    stg %r4, 200(%r15)
+; CHECK-NEXT:    stg %r3, 192(%r15)
+; CHECK-NEXT:    stg %r2, 216(%r15)
+; CHECK-NEXT:    la %r2, 208(%r15)
+; CHECK-NEXT:    la %r3, 192(%r15)
+; CHECK-NEXT:    la %r4, 176(%r15)
+; CHECK-NEXT:    la %r5, 160(%r15)
+; CHECK-NEXT:    stg %r0, 208(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lmg %r13, %r15, 328(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: call5:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -224
+; VECTOR-NEXT:    .cfi_def_cfa_offset 384
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    vl %v0, 0(%r1), 3
+; VECTOR-NEXT:    vl %v1, 16(%r1), 3
+; VECTOR-NEXT:    vl %v2, 32(%r1), 3
+; VECTOR-NEXT:    vl %v3, 48(%r1), 3
+; VECTOR-NEXT:    la %r2, 208(%r15)
+; VECTOR-NEXT:    la %r3, 192(%r15)
+; VECTOR-NEXT:    la %r4, 176(%r15)
+; VECTOR-NEXT:    la %r5, 160(%r15)
+; VECTOR-NEXT:    vst %v3, 160(%r15), 3
+; VECTOR-NEXT:    vst %v2, 176(%r15), 3
+; VECTOR-NEXT:    vst %v1, 192(%r15), 3
+; VECTOR-NEXT:    vst %v0, 208(%r15), 3
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    lmg %r14, %r15, 336(%r15)
+; VECTOR-NEXT:    br %r14
+  %L = load %Ty5, ptr @Src
+  call void @Fnptr(%Ty5 %L)
+  ret void
+}
+
+define %Ty5 @ret5() {
+; CHECK-LABEL: ret5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r12, %r15, 96(%r15)
+; CHECK-NEXT:    .cfi_offset %r12, -64
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -224
+; CHECK-NEXT:    .cfi_def_cfa_offset 384
+; CHECK-NEXT:    lgr %r13, %r2
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lg %r0, 168(%r15)
+; CHECK-NEXT:    lg %r1, 160(%r15)
+; CHECK-NEXT:    lg %r2, 184(%r15)
+; CHECK-NEXT:    lg %r3, 176(%r15)
+; CHECK-NEXT:    lg %r4, 208(%r15)
+; CHECK-NEXT:    lg %r5, 216(%r15)
+; CHECK-NEXT:    lg %r14, 192(%r15)
+; CHECK-NEXT:    lg %r12, 200(%r15)
+; CHECK-NEXT:    stg %r4, 48(%r13)
+; CHECK-NEXT:    stg %r5, 56(%r13)
+; CHECK-NEXT:    stg %r14, 32(%r13)
+; CHECK-NEXT:    stg %r12, 40(%r13)
+; CHECK-NEXT:    stg %r3, 16(%r13)
+; CHECK-NEXT:    stg %r2, 24(%r13)
+; CHECK-NEXT:    stg %r1, 0(%r13)
+; CHECK-NEXT:    stg %r0, 8(%r13)
+; CHECK-NEXT:    lmg %r12, %r15, 320(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: ret5:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT:    .cfi_offset %r13, -56
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -224
+; VECTOR-NEXT:    .cfi_def_cfa_offset 384
+; VECTOR-NEXT:    lgr %r13, %r2
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    vl %v0, 160(%r15), 3
+; VECTOR-NEXT:    vl %v1, 176(%r15), 3
+; VECTOR-NEXT:    vl %v2, 192(%r15), 3
+; VECTOR-NEXT:    vl %v3, 208(%r15), 3
+; VECTOR-NEXT:    vst %v3, 48(%r13), 3
+; VECTOR-NEXT:    vst %v2, 32(%r13), 3
+; VECTOR-NEXT:    vst %v1, 16(%r13), 3
+; VECTOR-NEXT:    vst %v0, 0(%r13), 3
+; VECTOR-NEXT:    lmg %r13, %r15, 328(%r15)
+; VECTOR-NEXT:    br %r14
+  %C = call %Ty5 @Fnptr()
+  ret %Ty5 %C
+}
+
+%Ty6 = type [2 x i72]
+define void @arg6(%Ty6 %A) {
+; CHECK-LABEL: arg6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lg %r0, 8(%r3)
+; CHECK-NEXT:    lgrl %r1, Dst at GOT
+; CHECK-NEXT:    lg %r4, 8(%r2)
+; CHECK-NEXT:    lg %r3, 0(%r3)
+; CHECK-NEXT:    lg %r2, 0(%r2)
+; CHECK-NEXT:    stc %r0, 24(%r1)
+; CHECK-NEXT:    stc %r4, 8(%r1)
+; CHECK-NEXT:    sllg %r3, %r3, 56
+; CHECK-NEXT:    rosbg %r3, %r0, 8, 63, 56
+; CHECK-NEXT:    stg %r3, 16(%r1)
+; CHECK-NEXT:    sllg %r0, %r2, 56
+; CHECK-NEXT:    rosbg %r0, %r4, 8, 63, 56
+; CHECK-NEXT:    stg %r0, 0(%r1)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: arg6:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    vl %v0, 0(%r2), 3
+; VECTOR-NEXT:    vl %v1, 0(%r3), 3
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
+; VECTOR-NEXT:    vsteb %v1, 24(%r1), 15
+; VECTOR-NEXT:    vrepib %v2, 8
+; VECTOR-NEXT:    vsteb %v0, 8(%r1), 15
+; VECTOR-NEXT:    vsrlb %v1, %v1, %v2
+; VECTOR-NEXT:    vsrlb %v0, %v0, %v2
+; VECTOR-NEXT:    vsteg %v1, 16(%r1), 1
+; VECTOR-NEXT:    vsteg %v0, 0(%r1), 1
+; VECTOR-NEXT:    br %r14
+  store %Ty6 %A, ptr @Dst
+  ret void
+}
+
+define void @call6() {
+; CHECK-LABEL: call6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 352
+; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    lg %r0, 0(%r1)
+; CHECK-NEXT:    lg %r2, 16(%r1)
+; CHECK-NEXT:    sllg %r3, %r0, 8
+; CHECK-NEXT:    sllg %r4, %r2, 8
+; CHECK-NEXT:    ic %r4, 24(%r1)
+; CHECK-NEXT:    ic %r3, 8(%r1)
+; CHECK-NEXT:    srlg %r0, %r0, 56
+; CHECK-NEXT:    srlg %r1, %r2, 56
+; CHECK-NEXT:    stg %r4, 168(%r15)
+; CHECK-NEXT:    stg %r1, 160(%r15)
+; CHECK-NEXT:    stg %r3, 184(%r15)
+; CHECK-NEXT:    la %r2, 176(%r15)
+; CHECK-NEXT:    la %r3, 160(%r15)
+; CHECK-NEXT:    stg %r0, 176(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: call6:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -192
+; VECTOR-NEXT:    .cfi_def_cfa_offset 352
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    vgbm %v1, 0
+; VECTOR-NEXT:    vleb %v1, 8(%r1), 15
+; VECTOR-NEXT:    vlrepg %v2, 0(%r1)
+; VECTOR-NEXT:    vrepib %v3, 8
+; VECTOR-NEXT:    vslb %v2, %v2, %v3
+; VECTOR-NEXT:    vgbm %v0, 0
+; VECTOR-NEXT:    vo %v1, %v1, %v2
+; VECTOR-NEXT:    vleb %v0, 24(%r1), 15
+; VECTOR-NEXT:    vlrepg %v2, 16(%r1)
+; VECTOR-NEXT:    vslb %v2, %v2, %v3
+; VECTOR-NEXT:    vo %v0, %v0, %v2
+; VECTOR-NEXT:    la %r2, 176(%r15)
+; VECTOR-NEXT:    la %r3, 160(%r15)
+; VECTOR-NEXT:    vst %v0, 160(%r15), 3
+; VECTOR-NEXT:    vst %v1, 176(%r15), 3
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT:    br %r14
+  %L = load %Ty6, ptr @Src
+  call void @Fnptr(%Ty6 %L)
+  ret void
+}
+
+define %Ty6 @ret6() {
+; CHECK-LABEL: ret6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 352
+; CHECK-NEXT:    lgr %r13, %r2
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lg %r0, 176(%r15)
+; CHECK-NEXT:    llgc %r1, 184(%r15)
+; CHECK-NEXT:    lg %r2, 160(%r15)
+; CHECK-NEXT:    llgc %r3, 168(%r15)
+; CHECK-NEXT:    stg %r0, 16(%r13)
+; CHECK-NEXT:    stc %r1, 24(%r13)
+; CHECK-NEXT:    stg %r2, 0(%r13)
+; CHECK-NEXT:    stc %r3, 8(%r13)
+; CHECK-NEXT:    lmg %r13, %r15, 296(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: ret6:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT:    .cfi_offset %r13, -56
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -192
+; VECTOR-NEXT:    .cfi_def_cfa_offset 352
+; VECTOR-NEXT:    lgr %r13, %r2
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    vgbm %v0, 0
+; VECTOR-NEXT:    vgbm %v1, 0
+; VECTOR-NEXT:    vleb %v1, 168(%r15), 15
+; VECTOR-NEXT:    vleb %v0, 184(%r15), 15
+; VECTOR-NEXT:    vlrepg %v2, 160(%r15)
+; VECTOR-NEXT:    vlrepg %v3, 176(%r15)
+; VECTOR-NEXT:    vsteg %v3, 16(%r13), 1
+; VECTOR-NEXT:    vsteb %v0, 24(%r13), 15
+; VECTOR-NEXT:    vsteg %v2, 0(%r13), 1
+; VECTOR-NEXT:    vsteb %v1, 8(%r13), 15
+; VECTOR-NEXT:    lmg %r13, %r15, 296(%r15)
+; VECTOR-NEXT:    br %r14
+  %C = call %Ty6 @Fnptr()
+  ret %Ty6 %C
+}
+
+%Ty7 = type {i128}
+define void @arg7(%Ty7 %A, %Ty7 %B) {
+; CHECK-LABEL: arg7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lg %r0, 8(%r2)
+; CHECK-NEXT:    lgrl %r1, Dst at GOT
+; CHECK-NEXT:    lg %r2, 0(%r2)
+; CHECK-NEXT:    lg %r4, 8(%r3)
+; CHECK-NEXT:    lg %r3, 0(%r3)
+; CHECK-NEXT:    stg %r0, 8(%r1)
+; CHECK-NEXT:    stg %r2, 0(%r1)
+; CHECK-NEXT:    stg %r4, 24(%r1)
+; CHECK-NEXT:    stg %r3, 16(%r1)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: arg7:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    vl %v0, 0(%r3), 3
+; VECTOR-NEXT:    vl %v1, 0(%r2), 3
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
+; VECTOR-NEXT:    vst %v1, 0(%r1), 3
+; VECTOR-NEXT:    vst %v0, 16(%r1), 3
+; VECTOR-NEXT:    br %r14
+  store %Ty7 %A, ptr @Dst
+  %D2 = getelementptr %Ty7, ptr @Dst, i32 1
+  store %Ty7 %B, ptr %D2
+  ret void
+}
+
+define void @call7() {
+; CHECK-LABEL: call7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 352
+; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    lg %r0, 24(%r1)
+; CHECK-NEXT:    lg %r2, 16(%r1)
+; CHECK-NEXT:    lg %r3, 8(%r1)
+; CHECK-NEXT:    lg %r1, 0(%r1)
+; CHECK-NEXT:    stg %r0, 168(%r15)
+; CHECK-NEXT:    stg %r2, 160(%r15)
+; CHECK-NEXT:    stg %r3, 184(%r15)
+; CHECK-NEXT:    la %r2, 176(%r15)
+; CHECK-NEXT:    la %r3, 160(%r15)
+; CHECK-NEXT:    stg %r1, 176(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: call7:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -192
+; VECTOR-NEXT:    .cfi_def_cfa_offset 352
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    vl %v0, 0(%r1), 3
+; VECTOR-NEXT:    vl %v1, 16(%r1), 3
+; VECTOR-NEXT:    la %r2, 176(%r15)
+; VECTOR-NEXT:    la %r3, 160(%r15)
+; VECTOR-NEXT:    vst %v1, 160(%r15), 3
+; VECTOR-NEXT:    vst %v0, 176(%r15), 3
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT:    br %r14
+  %L = load %Ty7, ptr @Src
+  %S2 = getelementptr %Ty7, ptr @Src, i32 1
+  %L2 = load %Ty7, ptr %S2
+  call void @Fnptr(%Ty7 %L, %Ty7 %L2)
+  ret void
+}


        


More information about the llvm-commits mailing list