[llvm] [SystemZ] Handle IR struct arguments correctly. (PR #169583)

Jonas Paulsson via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 3 17:35:47 PST 2025


https://github.com/JonPsson1 updated https://github.com/llvm/llvm-project/pull/169583

>From d78732afcba4174a23f419e1a6c5022ca6f6d4fc Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulsson at linux.vnet.ibm.com>
Date: Tue, 25 Nov 2025 22:36:51 +0100
Subject: [PATCH 1/5] Handle IR struct args correctly.

---
 .../Target/SystemZ/SystemZISelLowering.cpp    |  78 +++--
 llvm/test/CodeGen/SystemZ/args-22.ll          | 278 ++++++++++++++++++
 2 files changed, 331 insertions(+), 25 deletions(-)
 create mode 100644 llvm/test/CodeGen/SystemZ/args-22.ll

diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index eb93024bed35c..ab22c0efa8454 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1970,6 +1970,28 @@ SDValue SystemZTargetLowering::joinRegisterPartsIntoValue(
   return SDValue();
 }
 
+// The first part of a split stack argument is at index I in Args (and
+// ArgLocs). Return the type of a part and the number of them by reference.
+template <class ArgTy>
+static bool analyzeArgSplit(const SmallVectorImpl<ArgTy> &Args,
+                            SmallVector<CCValAssign, 16> &ArgLocs, unsigned I,
+                            MVT &PartVT, unsigned &NumParts) {
+  if (!Args[I].Flags.isSplit())
+    return false;
+  assert(I < ArgLocs.size() && ArgLocs.size() == Args.size() &&
+         "ArgLocs havoc.");
+  PartVT = ArgLocs[I].getValVT();
+  NumParts = 1;
+  for (unsigned PartIdx = I + 1;; ++PartIdx) {
+    assert(PartIdx != ArgLocs.size() && "SplitEnd not found.");
+    assert(ArgLocs[PartIdx].getValVT() == PartVT && "Unsupported split.");
+    ++NumParts;
+    if (Args[PartIdx].Flags.isSplitEnd())
+      break;
+  }
+  return true;
+}
+
 SDValue SystemZTargetLowering::LowerFormalArguments(
     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
@@ -2074,16 +2096,26 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
                                    MachinePointerInfo()));
       // If the original argument was split (e.g. i128), we need
       // to load all parts of it here (using the same address).
-      unsigned ArgIndex = Ins[I].OrigArgIndex;
-      assert (Ins[I].PartOffset == 0);
-      while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
-        CCValAssign &PartVA = ArgLocs[I + 1];
-        unsigned PartOffset = Ins[I + 1].PartOffset;
-        SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
-                                      DAG.getIntPtrConstant(PartOffset, DL));
-        InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
-                                     MachinePointerInfo()));
-        ++I;
+      MVT PartVT;
+      unsigned NumParts;
+      if (analyzeArgSplit(Ins, ArgLocs, I, PartVT, NumParts)) {
+        // TODO: It is strange that while LowerCallTo() sets the PartOffset
+        // relative to the first split part LowerArguments() sets the offset
+        // from the beginning of the struct. So with {i32, i256}, the
+        // PartOffset for the i256 parts are differently handled. Try to
+        // remove that difference and use PartOffset directly here (instead
+        // of SplitBaseOffs).
+        unsigned SplitBaseOffs = Ins[I].PartOffset;
+        for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
+          ++I;
+          CCValAssign &PartVA = ArgLocs[I];
+          unsigned PartOffset = Ins[I].PartOffset - SplitBaseOffs;
+          SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
+                                        DAG.getIntPtrConstant(PartOffset, DL));
+          InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
+                                       MachinePointerInfo()));
+          assert(PartOffset && "Offset should be non-zero.");
+        }
       }
     } else
       InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
@@ -2319,18 +2351,13 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
 
     if (VA.getLocInfo() == CCValAssign::Indirect) {
       // Store the argument in a stack slot and pass its address.
-      unsigned ArgIndex = Outs[I].OrigArgIndex;
       EVT SlotVT;
-      if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
-        // Allocate the full stack space for a promoted (and split) argument.
-        Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
-        EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
-        MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
-        unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
-        SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
-      } else {
+      MVT PartVT;
+      unsigned NumParts = 1;
+      if (analyzeArgSplit(Outs, ArgLocs, I, PartVT, NumParts))
+        SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * NumParts);
+      else
         SlotVT = Outs[I].VT;
-      }
       SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
       MemOpChains.push_back(
@@ -2338,18 +2365,19 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
                        MachinePointerInfo::getFixedStack(MF, FI)));
       // If the original argument was split (e.g. i128), we need
       // to store all parts of it here (and pass just one address).
-      assert (Outs[I].PartOffset == 0);
-      while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
-        SDValue PartValue = OutVals[I + 1];
-        unsigned PartOffset = Outs[I + 1].PartOffset;
+      assert(Outs[I].PartOffset == 0);
+      for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
+        ++I;
+        SDValue PartValue = OutVals[I];
+        unsigned PartOffset = Outs[I].PartOffset;
         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
                                       DAG.getIntPtrConstant(PartOffset, DL));
         MemOpChains.push_back(
             DAG.getStore(Chain, DL, PartValue, Address,
                          MachinePointerInfo::getFixedStack(MF, FI)));
+        assert(PartOffset && "Offset should be non-zero.");
         assert((PartOffset + PartValue.getValueType().getStoreSize() <=
                 SlotVT.getStoreSize()) && "Not enough space for argument part!");
-        ++I;
       }
       ArgValue = SpillSlot;
     } else
diff --git a/llvm/test/CodeGen/SystemZ/args-22.ll b/llvm/test/CodeGen/SystemZ/args-22.ll
new file mode 100644
index 0000000000000..b920bb4847061
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/args-22.ll
@@ -0,0 +1,278 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s --check-prefix=VECTOR
+;
+; Test passing IR struct arguments, which do not adhere to the ABI but are
+; split up with each element passed like a separate argument.
+
+%Ty0 = type {i128}
+define fastcc void @fun0(%Ty0 %A) {
+; CHECK-LABEL: fun0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -176
+; CHECK-NEXT:    .cfi_def_cfa_offset 336
+; CHECK-NEXT:    lg %r0, 8(%r2)
+; CHECK-NEXT:    lg %r1, 0(%r2)
+; CHECK-NEXT:    stg %r0, 168(%r15)
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    stg %r1, 160(%r15)
+; CHECK-NEXT:    basr %r14, %r1
+; CHECK-NEXT:    lmg %r14, %r15, 288(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: fun0:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -176
+; VECTOR-NEXT:    .cfi_def_cfa_offset 336
+; VECTOR-NEXT:    vl %v0, 0(%r2), 3
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    vst %v0, 160(%r15), 3
+; VECTOR-NEXT:    basr %r14, %r1
+; VECTOR-NEXT:    lmg %r14, %r15, 288(%r15)
+; VECTOR-NEXT:    br %r14
+  call void undef(%Ty0 %A)
+  ret void
+}
+
+%Ty1 = type {i128, i128}
+define fastcc void @fun1(%Ty1 %A, %Ty1 %B) {
+; CHECK-LABEL: fun1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -224
+; CHECK-NEXT:    .cfi_def_cfa_offset 384
+; CHECK-NEXT:    lg %r0, 0(%r2)
+; CHECK-NEXT:    lg %r1, 8(%r2)
+; CHECK-NEXT:    lg %r2, 0(%r3)
+; CHECK-NEXT:    lg %r3, 8(%r3)
+; CHECK-NEXT:    lg %r14, 8(%r5)
+; CHECK-NEXT:    lg %r5, 0(%r5)
+; CHECK-NEXT:    lg %r13, 8(%r4)
+; CHECK-NEXT:    lg %r4, 0(%r4)
+; CHECK-NEXT:    stg %r14, 168(%r15)
+; CHECK-NEXT:    stg %r5, 160(%r15)
+; CHECK-NEXT:    stg %r13, 184(%r15)
+; CHECK-NEXT:    stg %r4, 176(%r15)
+; CHECK-NEXT:    stg %r3, 200(%r15)
+; CHECK-NEXT:    stg %r2, 192(%r15)
+; CHECK-NEXT:    stg %r1, 216(%r15)
+; CHECK-NEXT:    la %r2, 208(%r15)
+; CHECK-NEXT:    la %r3, 192(%r15)
+; CHECK-NEXT:    la %r4, 176(%r15)
+; CHECK-NEXT:    la %r5, 160(%r15)
+; CHECK-NEXT:    stg %r0, 208(%r15)
+; CHECK-NEXT:    basr %r14, %r1
+; CHECK-NEXT:    lmg %r13, %r15, 328(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: fun1:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -224
+; VECTOR-NEXT:    .cfi_def_cfa_offset 384
+; VECTOR-NEXT:    vl %v0, 0(%r2), 3
+; VECTOR-NEXT:    vl %v1, 0(%r3), 3
+; VECTOR-NEXT:    vl %v2, 0(%r4), 3
+; VECTOR-NEXT:    vl %v3, 0(%r5), 3
+; VECTOR-NEXT:    la %r2, 208(%r15)
+; VECTOR-NEXT:    la %r3, 192(%r15)
+; VECTOR-NEXT:    la %r4, 176(%r15)
+; VECTOR-NEXT:    la %r5, 160(%r15)
+; VECTOR-NEXT:    vst %v3, 160(%r15), 3
+; VECTOR-NEXT:    vst %v2, 176(%r15), 3
+; VECTOR-NEXT:    vst %v1, 192(%r15), 3
+; VECTOR-NEXT:    vst %v0, 208(%r15), 3
+; VECTOR-NEXT:    basr %r14, %r1
+; VECTOR-NEXT:    lmg %r14, %r15, 336(%r15)
+; VECTOR-NEXT:    br %r14
+  call void undef(%Ty1 %A, %Ty1 %B)
+  ret void
+}
+
+%Ty2 = type {i256}
+define fastcc void @fun2(%Ty2 %A, %Ty2 %B) {
+; CHECK-LABEL: fun2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -224
+; CHECK-NEXT:    .cfi_def_cfa_offset 384
+; CHECK-NEXT:    lg %r0, 0(%r2)
+; CHECK-NEXT:    lg %r1, 8(%r2)
+; CHECK-NEXT:    lg %r4, 16(%r2)
+; CHECK-NEXT:    lg %r2, 24(%r2)
+; CHECK-NEXT:    lg %r5, 24(%r3)
+; CHECK-NEXT:    lg %r14, 16(%r3)
+; CHECK-NEXT:    lg %r13, 8(%r3)
+; CHECK-NEXT:    lg %r3, 0(%r3)
+; CHECK-NEXT:    stg %r5, 184(%r15)
+; CHECK-NEXT:    stg %r14, 176(%r15)
+; CHECK-NEXT:    stg %r13, 168(%r15)
+; CHECK-NEXT:    stg %r3, 160(%r15)
+; CHECK-NEXT:    stg %r2, 216(%r15)
+; CHECK-NEXT:    stg %r4, 208(%r15)
+; CHECK-NEXT:    stg %r1, 200(%r15)
+; CHECK-NEXT:    la %r2, 192(%r15)
+; CHECK-NEXT:    la %r3, 160(%r15)
+; CHECK-NEXT:    stg %r0, 192(%r15)
+; CHECK-NEXT:    basr %r14, %r1
+; CHECK-NEXT:    lmg %r13, %r15, 328(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: fun2:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -224
+; VECTOR-NEXT:    .cfi_def_cfa_offset 384
+; VECTOR-NEXT:    vl %v0, 0(%r2), 3
+; VECTOR-NEXT:    vl %v1, 16(%r2), 3
+; VECTOR-NEXT:    vl %v2, 0(%r3), 3
+; VECTOR-NEXT:    vl %v3, 16(%r3), 3
+; VECTOR-NEXT:    la %r2, 192(%r15)
+; VECTOR-NEXT:    la %r3, 160(%r15)
+; VECTOR-NEXT:    vst %v3, 176(%r15), 3
+; VECTOR-NEXT:    vst %v2, 160(%r15), 3
+; VECTOR-NEXT:    vst %v1, 208(%r15), 3
+; VECTOR-NEXT:    vst %v0, 192(%r15), 3
+; VECTOR-NEXT:    basr %r14, %r1
+; VECTOR-NEXT:    lmg %r14, %r15, 336(%r15)
+; VECTOR-NEXT:    br %r14
+  call void undef(%Ty2 %A, %Ty2 %B)
+  ret void
+}
+
+%Ty3 = type {float, i256, i32, i128, i8}
+define fastcc void @fun3(%Ty3 %A) {
+; CHECK-LABEL: fun3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -208
+; CHECK-NEXT:    .cfi_def_cfa_offset 368
+; CHECK-NEXT:    lg %r0, 0(%r2)
+; CHECK-NEXT:    lg %r1, 8(%r2)
+; CHECK-NEXT:    lg %r14, 16(%r2)
+; CHECK-NEXT:    lg %r2, 24(%r2)
+; CHECK-NEXT:    lg %r13, 0(%r4)
+; CHECK-NEXT:    lg %r4, 8(%r4)
+; CHECK-NEXT:    stc %r5, 64
+; CHECK-NEXT:    st %r3, 40
+; CHECK-NEXT:    ste %f0, 0
+; CHECK-NEXT:    stg %r4, 56
+; CHECK-NEXT:    stg %r13, 48
+; CHECK-NEXT:    stg %r2, 32
+; CHECK-NEXT:    stg %r14, 24
+; CHECK-NEXT:    stg %r1, 16
+; CHECK-NEXT:    stg %r0, 8
+; CHECK-NEXT:    stg %r4, 168(%r15)
+; CHECK-NEXT:    stg %r13, 160(%r15)
+; CHECK-NEXT:    stg %r2, 200(%r15)
+; CHECK-NEXT:    stg %r14, 192(%r15)
+; CHECK-NEXT:    stg %r1, 184(%r15)
+; CHECK-NEXT:    la %r2, 176(%r15)
+; CHECK-NEXT:    la %r4, 160(%r15)
+; CHECK-NEXT:    stg %r0, 176(%r15)
+; CHECK-NEXT:    basr %r14, %r1
+; CHECK-NEXT:    lmg %r13, %r15, 312(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: fun3:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -208
+; VECTOR-NEXT:    .cfi_def_cfa_offset 368
+; VECTOR-NEXT:    vl %v1, 0(%r4), 3
+; VECTOR-NEXT:    vl %v2, 0(%r2), 3
+; VECTOR-NEXT:    vl %v3, 16(%r2), 3
+; VECTOR-NEXT:    la %r2, 176(%r15)
+; VECTOR-NEXT:    la %r4, 160(%r15)
+; VECTOR-NEXT:    stc %r5, 64
+; VECTOR-NEXT:    st %r3, 40
+; VECTOR-NEXT:    ste %f0, 0
+; VECTOR-NEXT:    vst %v3, 24, 3
+; VECTOR-NEXT:    vst %v2, 8, 3
+; VECTOR-NEXT:    vst %v1, 48, 3
+; VECTOR-NEXT:    vst %v1, 160(%r15), 3
+; VECTOR-NEXT:    vst %v3, 192(%r15), 3
+; VECTOR-NEXT:    vst %v2, 176(%r15), 3
+; VECTOR-NEXT:    basr %r14, %r1
+; VECTOR-NEXT:    lmg %r14, %r15, 320(%r15)
+; VECTOR-NEXT:    br %r14
+  store %Ty3 %A, ptr null
+  call void undef(%Ty3 %A)
+  ret void
+}
+
+%Ty4 = type {i72, i128}
+define fastcc void @fun4(%Ty4 %A) {
+; CHECK-LABEL: fun4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 352
+; CHECK-NEXT:    lg %r0, 8(%r3)
+; CHECK-NEXT:    lg %r1, 0(%r3)
+; CHECK-NEXT:    lg %r3, 8(%r2)
+; CHECK-NEXT:    lg %r4, 0(%r2)
+; CHECK-NEXT:    stg %r0, 24
+; CHECK-NEXT:    stg %r1, 16
+; CHECK-NEXT:    stc %r3, 8
+; CHECK-NEXT:    sllg %r2, %r4, 56
+; CHECK-NEXT:    rosbg %r2, %r3, 8, 63, 56
+; CHECK-NEXT:    stg %r2, 0
+; CHECK-NEXT:    stg %r0, 168(%r15)
+; CHECK-NEXT:    stg %r1, 160(%r15)
+; CHECK-NEXT:    stg %r3, 184(%r15)
+; CHECK-NEXT:    la %r2, 176(%r15)
+; CHECK-NEXT:    la %r3, 160(%r15)
+; CHECK-NEXT:    stg %r4, 176(%r15)
+; CHECK-NEXT:    basr %r14, %r1
+; CHECK-NEXT:    lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: fun4:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -192
+; VECTOR-NEXT:    .cfi_def_cfa_offset 352
+; VECTOR-NEXT:    vl %v1, 0(%r2), 3
+; VECTOR-NEXT:    vl %v0, 0(%r3), 3
+; VECTOR-NEXT:    vsteb %v1, 8, 15
+; VECTOR-NEXT:    vrepib %v2, 8
+; VECTOR-NEXT:    vsrlb %v2, %v1, %v2
+; VECTOR-NEXT:    la %r2, 176(%r15)
+; VECTOR-NEXT:    la %r3, 160(%r15)
+; VECTOR-NEXT:    vst %v0, 16, 3
+; VECTOR-NEXT:    vsteg %v2, 0, 1
+; VECTOR-NEXT:    vst %v0, 160(%r15), 3
+; VECTOR-NEXT:    vst %v1, 176(%r15), 3
+; VECTOR-NEXT:    basr %r14, %r1
+; VECTOR-NEXT:    lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT:    br %r14
+  store %Ty4 %A, ptr null
+  call void undef(%Ty4 %A)
+  ret void
+}

>From 8d5fc621a64512a527e46973d3d4fda29a1636a2 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulsson at linux.vnet.ibm.com>
Date: Wed, 26 Nov 2025 00:40:49 +0100
Subject: [PATCH 2/5] Add test case for returning.

---
 llvm/test/CodeGen/SystemZ/args-22.ll | 43 ++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/llvm/test/CodeGen/SystemZ/args-22.ll b/llvm/test/CodeGen/SystemZ/args-22.ll
index b920bb4847061..39f7ff5dffe4b 100644
--- a/llvm/test/CodeGen/SystemZ/args-22.ll
+++ b/llvm/test/CodeGen/SystemZ/args-22.ll
@@ -276,3 +276,46 @@ define fastcc void @fun4(%Ty4 %A) {
   call void undef(%Ty4 %A)
   ret void
 }
+
+%Ty5 = type {i128, i128}
+declare fastcc %Ty5 @foo5()
+define fastcc void @fun5() {
+; CHECK-LABEL: fun5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 352
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, foo5 at PLT
+; CHECK-NEXT:    lg %r0, 176(%r15)
+; CHECK-NEXT:    lg %r1, 184(%r15)
+; CHECK-NEXT:    lg %r2, 160(%r15)
+; CHECK-NEXT:    lg %r3, 168(%r15)
+; CHECK-NEXT:    stg %r0, 16
+; CHECK-NEXT:    stg %r1, 24
+; CHECK-NEXT:    stg %r2, 0
+; CHECK-NEXT:    stg %r3, 8
+; CHECK-NEXT:    lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: fun5:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -192
+; VECTOR-NEXT:    .cfi_def_cfa_offset 352
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    brasl %r14, foo5 at PLT
+; VECTOR-NEXT:    vl %v0, 160(%r15), 3
+; VECTOR-NEXT:    vl %v1, 176(%r15), 3
+; VECTOR-NEXT:    vst %v1, 16, 3
+; VECTOR-NEXT:    vst %v0, 0, 3
+; VECTOR-NEXT:    lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT:    br %r14
+  %A = call %Ty5 @foo5()
+  store %Ty5 %A, ptr null
+  ret void
+}

>From 120a9a7befc87828c61a853ba9648675b908b392 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulsson at linux.vnet.ibm.com>
Date: Wed, 26 Nov 2025 01:23:33 +0100
Subject: [PATCH 3/5] Avoid undef calls in tests.

---
 llvm/test/CodeGen/SystemZ/args-22.ll | 32 +++++++++++++++-------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/llvm/test/CodeGen/SystemZ/args-22.ll b/llvm/test/CodeGen/SystemZ/args-22.ll
index 39f7ff5dffe4b..021a7536dae3e 100644
--- a/llvm/test/CodeGen/SystemZ/args-22.ll
+++ b/llvm/test/CodeGen/SystemZ/args-22.ll
@@ -5,6 +5,8 @@
 ; Test passing IR struct arguments, which do not adhere to the ABI but are
 ; split up with each element passed like a separate argument.
 
+ at fnptr = external global ptr
+
 %Ty0 = type {i128}
 define fastcc void @fun0(%Ty0 %A) {
 ; CHECK-LABEL: fun0:
@@ -19,7 +21,7 @@ define fastcc void @fun0(%Ty0 %A) {
 ; CHECK-NEXT:    stg %r0, 168(%r15)
 ; CHECK-NEXT:    la %r2, 160(%r15)
 ; CHECK-NEXT:    stg %r1, 160(%r15)
-; CHECK-NEXT:    basr %r14, %r1
+; CHECK-NEXT:    brasl %r14, fnptr at PLT
 ; CHECK-NEXT:    lmg %r14, %r15, 288(%r15)
 ; CHECK-NEXT:    br %r14
 ;
@@ -33,10 +35,10 @@ define fastcc void @fun0(%Ty0 %A) {
 ; VECTOR-NEXT:    vl %v0, 0(%r2), 3
 ; VECTOR-NEXT:    la %r2, 160(%r15)
 ; VECTOR-NEXT:    vst %v0, 160(%r15), 3
-; VECTOR-NEXT:    basr %r14, %r1
+; VECTOR-NEXT:    brasl %r14, fnptr at PLT
 ; VECTOR-NEXT:    lmg %r14, %r15, 288(%r15)
 ; VECTOR-NEXT:    br %r14
-  call void undef(%Ty0 %A)
+  call void @fnptr(%Ty0 %A)
   ret void
 }
 
@@ -70,7 +72,7 @@ define fastcc void @fun1(%Ty1 %A, %Ty1 %B) {
 ; CHECK-NEXT:    la %r4, 176(%r15)
 ; CHECK-NEXT:    la %r5, 160(%r15)
 ; CHECK-NEXT:    stg %r0, 208(%r15)
-; CHECK-NEXT:    basr %r14, %r1
+; CHECK-NEXT:    brasl %r14, fnptr at PLT
 ; CHECK-NEXT:    lmg %r13, %r15, 328(%r15)
 ; CHECK-NEXT:    br %r14
 ;
@@ -93,10 +95,10 @@ define fastcc void @fun1(%Ty1 %A, %Ty1 %B) {
 ; VECTOR-NEXT:    vst %v2, 176(%r15), 3
 ; VECTOR-NEXT:    vst %v1, 192(%r15), 3
 ; VECTOR-NEXT:    vst %v0, 208(%r15), 3
-; VECTOR-NEXT:    basr %r14, %r1
+; VECTOR-NEXT:    brasl %r14, fnptr at PLT
 ; VECTOR-NEXT:    lmg %r14, %r15, 336(%r15)
 ; VECTOR-NEXT:    br %r14
-  call void undef(%Ty1 %A, %Ty1 %B)
+  call void @fnptr(%Ty1 %A, %Ty1 %B)
   ret void
 }
 
@@ -128,7 +130,7 @@ define fastcc void @fun2(%Ty2 %A, %Ty2 %B) {
 ; CHECK-NEXT:    la %r2, 192(%r15)
 ; CHECK-NEXT:    la %r3, 160(%r15)
 ; CHECK-NEXT:    stg %r0, 192(%r15)
-; CHECK-NEXT:    basr %r14, %r1
+; CHECK-NEXT:    brasl %r14, fnptr at PLT
 ; CHECK-NEXT:    lmg %r13, %r15, 328(%r15)
 ; CHECK-NEXT:    br %r14
 ;
@@ -149,10 +151,10 @@ define fastcc void @fun2(%Ty2 %A, %Ty2 %B) {
 ; VECTOR-NEXT:    vst %v2, 160(%r15), 3
 ; VECTOR-NEXT:    vst %v1, 208(%r15), 3
 ; VECTOR-NEXT:    vst %v0, 192(%r15), 3
-; VECTOR-NEXT:    basr %r14, %r1
+; VECTOR-NEXT:    brasl %r14, fnptr at PLT
 ; VECTOR-NEXT:    lmg %r14, %r15, 336(%r15)
 ; VECTOR-NEXT:    br %r14
-  call void undef(%Ty2 %A, %Ty2 %B)
+  call void @fnptr(%Ty2 %A, %Ty2 %B)
   ret void
 }
 
@@ -189,7 +191,7 @@ define fastcc void @fun3(%Ty3 %A) {
 ; CHECK-NEXT:    la %r2, 176(%r15)
 ; CHECK-NEXT:    la %r4, 160(%r15)
 ; CHECK-NEXT:    stg %r0, 176(%r15)
-; CHECK-NEXT:    basr %r14, %r1
+; CHECK-NEXT:    brasl %r14, fnptr at PLT
 ; CHECK-NEXT:    lmg %r13, %r15, 312(%r15)
 ; CHECK-NEXT:    br %r14
 ;
@@ -214,11 +216,11 @@ define fastcc void @fun3(%Ty3 %A) {
 ; VECTOR-NEXT:    vst %v1, 160(%r15), 3
 ; VECTOR-NEXT:    vst %v3, 192(%r15), 3
 ; VECTOR-NEXT:    vst %v2, 176(%r15), 3
-; VECTOR-NEXT:    basr %r14, %r1
+; VECTOR-NEXT:    brasl %r14, fnptr at PLT
 ; VECTOR-NEXT:    lmg %r14, %r15, 320(%r15)
 ; VECTOR-NEXT:    br %r14
   store %Ty3 %A, ptr null
-  call void undef(%Ty3 %A)
+  call void @fnptr(%Ty3 %A)
   ret void
 }
 
@@ -247,7 +249,7 @@ define fastcc void @fun4(%Ty4 %A) {
 ; CHECK-NEXT:    la %r2, 176(%r15)
 ; CHECK-NEXT:    la %r3, 160(%r15)
 ; CHECK-NEXT:    stg %r4, 176(%r15)
-; CHECK-NEXT:    basr %r14, %r1
+; CHECK-NEXT:    brasl %r14, fnptr at PLT
 ; CHECK-NEXT:    lmg %r14, %r15, 304(%r15)
 ; CHECK-NEXT:    br %r14
 ;
@@ -269,11 +271,11 @@ define fastcc void @fun4(%Ty4 %A) {
 ; VECTOR-NEXT:    vsteg %v2, 0, 1
 ; VECTOR-NEXT:    vst %v0, 160(%r15), 3
 ; VECTOR-NEXT:    vst %v1, 176(%r15), 3
-; VECTOR-NEXT:    basr %r14, %r1
+; VECTOR-NEXT:    brasl %r14, fnptr at PLT
 ; VECTOR-NEXT:    lmg %r14, %r15, 304(%r15)
 ; VECTOR-NEXT:    br %r14
   store %Ty4 %A, ptr null
-  call void undef(%Ty4 %A)
+  call void @fnptr(%Ty4 %A)
   ret void
 }
 

>From b08256f11c24d85be32f42f25c787720625b2b2f Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulsson at linux.vnet.ibm.com>
Date: Wed, 26 Nov 2025 23:13:13 +0100
Subject: [PATCH 4/5] Testing improved.

---
 llvm/test/CodeGen/SystemZ/args-22.ll | 389 ++++++++++++++++++++++-----
 1 file changed, 318 insertions(+), 71 deletions(-)

diff --git a/llvm/test/CodeGen/SystemZ/args-22.ll b/llvm/test/CodeGen/SystemZ/args-22.ll
index 021a7536dae3e..da0f3e08c7a62 100644
--- a/llvm/test/CodeGen/SystemZ/args-22.ll
+++ b/llvm/test/CodeGen/SystemZ/args-22.ll
@@ -5,7 +5,9 @@
 ; Test passing IR struct arguments, which do not adhere to the ABI but are
 ; split up with each element passed like a separate argument.
 
- at fnptr = external global ptr
+ at Fnptr = external global ptr
+ at Src = external global ptr
+ at Dst = external global ptr
 
 %Ty0 = type {i128}
 define fastcc void @fun0(%Ty0 %A) {
@@ -21,7 +23,7 @@ define fastcc void @fun0(%Ty0 %A) {
 ; CHECK-NEXT:    stg %r0, 168(%r15)
 ; CHECK-NEXT:    la %r2, 160(%r15)
 ; CHECK-NEXT:    stg %r1, 160(%r15)
-; CHECK-NEXT:    brasl %r14, fnptr at PLT
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
 ; CHECK-NEXT:    lmg %r14, %r15, 288(%r15)
 ; CHECK-NEXT:    br %r14
 ;
@@ -35,10 +37,10 @@ define fastcc void @fun0(%Ty0 %A) {
 ; VECTOR-NEXT:    vl %v0, 0(%r2), 3
 ; VECTOR-NEXT:    la %r2, 160(%r15)
 ; VECTOR-NEXT:    vst %v0, 160(%r15), 3
-; VECTOR-NEXT:    brasl %r14, fnptr at PLT
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
 ; VECTOR-NEXT:    lmg %r14, %r15, 288(%r15)
 ; VECTOR-NEXT:    br %r14
-  call void @fnptr(%Ty0 %A)
+  call void @Fnptr(%Ty0 %A)
   ret void
 }
 
@@ -72,7 +74,7 @@ define fastcc void @fun1(%Ty1 %A, %Ty1 %B) {
 ; CHECK-NEXT:    la %r4, 176(%r15)
 ; CHECK-NEXT:    la %r5, 160(%r15)
 ; CHECK-NEXT:    stg %r0, 208(%r15)
-; CHECK-NEXT:    brasl %r14, fnptr at PLT
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
 ; CHECK-NEXT:    lmg %r13, %r15, 328(%r15)
 ; CHECK-NEXT:    br %r14
 ;
@@ -95,10 +97,10 @@ define fastcc void @fun1(%Ty1 %A, %Ty1 %B) {
 ; VECTOR-NEXT:    vst %v2, 176(%r15), 3
 ; VECTOR-NEXT:    vst %v1, 192(%r15), 3
 ; VECTOR-NEXT:    vst %v0, 208(%r15), 3
-; VECTOR-NEXT:    brasl %r14, fnptr at PLT
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
 ; VECTOR-NEXT:    lmg %r14, %r15, 336(%r15)
 ; VECTOR-NEXT:    br %r14
-  call void @fnptr(%Ty1 %A, %Ty1 %B)
+  call void @Fnptr(%Ty1 %A, %Ty1 %B)
   ret void
 }
 
@@ -130,7 +132,7 @@ define fastcc void @fun2(%Ty2 %A, %Ty2 %B) {
 ; CHECK-NEXT:    la %r2, 192(%r15)
 ; CHECK-NEXT:    la %r3, 160(%r15)
 ; CHECK-NEXT:    stg %r0, 192(%r15)
-; CHECK-NEXT:    brasl %r14, fnptr at PLT
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
 ; CHECK-NEXT:    lmg %r13, %r15, 328(%r15)
 ; CHECK-NEXT:    br %r14
 ;
@@ -151,10 +153,10 @@ define fastcc void @fun2(%Ty2 %A, %Ty2 %B) {
 ; VECTOR-NEXT:    vst %v2, 160(%r15), 3
 ; VECTOR-NEXT:    vst %v1, 208(%r15), 3
 ; VECTOR-NEXT:    vst %v0, 192(%r15), 3
-; VECTOR-NEXT:    brasl %r14, fnptr at PLT
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
 ; VECTOR-NEXT:    lmg %r14, %r15, 336(%r15)
 ; VECTOR-NEXT:    br %r14
-  call void @fnptr(%Ty2 %A, %Ty2 %B)
+  call void @Fnptr(%Ty2 %A, %Ty2 %B)
   ret void
 }
 
@@ -162,7 +164,8 @@ define fastcc void @fun2(%Ty2 %A, %Ty2 %B) {
 define fastcc void @fun3(%Ty3 %A) {
 ; CHECK-LABEL: fun3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    stmg %r12, %r15, 96(%r15)
+; CHECK-NEXT:    .cfi_offset %r12, -64
 ; CHECK-NEXT:    .cfi_offset %r13, -56
 ; CHECK-NEXT:    .cfi_offset %r14, -48
 ; CHECK-NEXT:    .cfi_offset %r15, -40
@@ -171,28 +174,29 @@ define fastcc void @fun3(%Ty3 %A) {
 ; CHECK-NEXT:    lg %r0, 0(%r2)
 ; CHECK-NEXT:    lg %r1, 8(%r2)
 ; CHECK-NEXT:    lg %r14, 16(%r2)
+; CHECK-NEXT:    lgrl %r13, Dst at GOT
 ; CHECK-NEXT:    lg %r2, 24(%r2)
-; CHECK-NEXT:    lg %r13, 0(%r4)
+; CHECK-NEXT:    lg %r12, 0(%r4)
 ; CHECK-NEXT:    lg %r4, 8(%r4)
-; CHECK-NEXT:    stc %r5, 64
-; CHECK-NEXT:    st %r3, 40
-; CHECK-NEXT:    ste %f0, 0
-; CHECK-NEXT:    stg %r4, 56
-; CHECK-NEXT:    stg %r13, 48
-; CHECK-NEXT:    stg %r2, 32
-; CHECK-NEXT:    stg %r14, 24
-; CHECK-NEXT:    stg %r1, 16
-; CHECK-NEXT:    stg %r0, 8
+; CHECK-NEXT:    stc %r5, 64(%r13)
+; CHECK-NEXT:    st %r3, 40(%r13)
+; CHECK-NEXT:    ste %f0, 0(%r13)
+; CHECK-NEXT:    stg %r4, 56(%r13)
+; CHECK-NEXT:    stg %r12, 48(%r13)
+; CHECK-NEXT:    stg %r2, 32(%r13)
+; CHECK-NEXT:    stg %r14, 24(%r13)
+; CHECK-NEXT:    stg %r1, 16(%r13)
+; CHECK-NEXT:    stg %r0, 8(%r13)
 ; CHECK-NEXT:    stg %r4, 168(%r15)
-; CHECK-NEXT:    stg %r13, 160(%r15)
+; CHECK-NEXT:    stg %r12, 160(%r15)
 ; CHECK-NEXT:    stg %r2, 200(%r15)
 ; CHECK-NEXT:    stg %r14, 192(%r15)
 ; CHECK-NEXT:    stg %r1, 184(%r15)
 ; CHECK-NEXT:    la %r2, 176(%r15)
 ; CHECK-NEXT:    la %r4, 160(%r15)
 ; CHECK-NEXT:    stg %r0, 176(%r15)
-; CHECK-NEXT:    brasl %r14, fnptr at PLT
-; CHECK-NEXT:    lmg %r13, %r15, 312(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lmg %r12, %r15, 304(%r15)
 ; CHECK-NEXT:    br %r14
 ;
 ; VECTOR-LABEL: fun3:
@@ -205,22 +209,23 @@ define fastcc void @fun3(%Ty3 %A) {
 ; VECTOR-NEXT:    vl %v1, 0(%r4), 3
 ; VECTOR-NEXT:    vl %v2, 0(%r2), 3
 ; VECTOR-NEXT:    vl %v3, 16(%r2), 3
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
 ; VECTOR-NEXT:    la %r2, 176(%r15)
 ; VECTOR-NEXT:    la %r4, 160(%r15)
-; VECTOR-NEXT:    stc %r5, 64
-; VECTOR-NEXT:    st %r3, 40
-; VECTOR-NEXT:    ste %f0, 0
-; VECTOR-NEXT:    vst %v3, 24, 3
-; VECTOR-NEXT:    vst %v2, 8, 3
-; VECTOR-NEXT:    vst %v1, 48, 3
+; VECTOR-NEXT:    stc %r5, 64(%r1)
+; VECTOR-NEXT:    st %r3, 40(%r1)
+; VECTOR-NEXT:    ste %f0, 0(%r1)
+; VECTOR-NEXT:    vst %v3, 24(%r1), 3
+; VECTOR-NEXT:    vst %v2, 8(%r1), 3
+; VECTOR-NEXT:    vst %v1, 48(%r1), 3
 ; VECTOR-NEXT:    vst %v1, 160(%r15), 3
 ; VECTOR-NEXT:    vst %v3, 192(%r15), 3
 ; VECTOR-NEXT:    vst %v2, 176(%r15), 3
-; VECTOR-NEXT:    brasl %r14, fnptr at PLT
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
 ; VECTOR-NEXT:    lmg %r14, %r15, 320(%r15)
 ; VECTOR-NEXT:    br %r14
-  store %Ty3 %A, ptr null
-  call void @fnptr(%Ty3 %A)
+  store %Ty3 %A, ptr @Dst
+  call void @Fnptr(%Ty3 %A)
   ret void
 }
 
@@ -234,22 +239,23 @@ define fastcc void @fun4(%Ty4 %A) {
 ; CHECK-NEXT:    aghi %r15, -192
 ; CHECK-NEXT:    .cfi_def_cfa_offset 352
 ; CHECK-NEXT:    lg %r0, 8(%r3)
-; CHECK-NEXT:    lg %r1, 0(%r3)
-; CHECK-NEXT:    lg %r3, 8(%r2)
-; CHECK-NEXT:    lg %r4, 0(%r2)
-; CHECK-NEXT:    stg %r0, 24
-; CHECK-NEXT:    stg %r1, 16
-; CHECK-NEXT:    stc %r3, 8
-; CHECK-NEXT:    sllg %r2, %r4, 56
-; CHECK-NEXT:    rosbg %r2, %r3, 8, 63, 56
-; CHECK-NEXT:    stg %r2, 0
+; CHECK-NEXT:    lgrl %r1, Dst at GOT
+; CHECK-NEXT:    lg %r3, 0(%r3)
+; CHECK-NEXT:    lg %r4, 8(%r2)
+; CHECK-NEXT:    lg %r5, 0(%r2)
+; CHECK-NEXT:    stg %r0, 24(%r1)
+; CHECK-NEXT:    stg %r3, 16(%r1)
+; CHECK-NEXT:    stc %r4, 8(%r1)
+; CHECK-NEXT:    sllg %r2, %r5, 56
+; CHECK-NEXT:    rosbg %r2, %r4, 8, 63, 56
+; CHECK-NEXT:    stg %r2, 0(%r1)
 ; CHECK-NEXT:    stg %r0, 168(%r15)
-; CHECK-NEXT:    stg %r1, 160(%r15)
-; CHECK-NEXT:    stg %r3, 184(%r15)
+; CHECK-NEXT:    stg %r3, 160(%r15)
+; CHECK-NEXT:    stg %r4, 184(%r15)
 ; CHECK-NEXT:    la %r2, 176(%r15)
 ; CHECK-NEXT:    la %r3, 160(%r15)
-; CHECK-NEXT:    stg %r4, 176(%r15)
-; CHECK-NEXT:    brasl %r14, fnptr at PLT
+; CHECK-NEXT:    stg %r5, 176(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
 ; CHECK-NEXT:    lmg %r14, %r15, 304(%r15)
 ; CHECK-NEXT:    br %r14
 ;
@@ -262,62 +268,303 @@ define fastcc void @fun4(%Ty4 %A) {
 ; VECTOR-NEXT:    .cfi_def_cfa_offset 352
 ; VECTOR-NEXT:    vl %v1, 0(%r2), 3
 ; VECTOR-NEXT:    vl %v0, 0(%r3), 3
-; VECTOR-NEXT:    vsteb %v1, 8, 15
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
 ; VECTOR-NEXT:    vrepib %v2, 8
 ; VECTOR-NEXT:    vsrlb %v2, %v1, %v2
+; VECTOR-NEXT:    vsteb %v1, 8(%r1), 15
 ; VECTOR-NEXT:    la %r2, 176(%r15)
 ; VECTOR-NEXT:    la %r3, 160(%r15)
-; VECTOR-NEXT:    vst %v0, 16, 3
-; VECTOR-NEXT:    vsteg %v2, 0, 1
+; VECTOR-NEXT:    vst %v0, 16(%r1), 3
+; VECTOR-NEXT:    vsteg %v2, 0(%r1), 1
 ; VECTOR-NEXT:    vst %v0, 160(%r15), 3
 ; VECTOR-NEXT:    vst %v1, 176(%r15), 3
-; VECTOR-NEXT:    brasl %r14, fnptr at PLT
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
 ; VECTOR-NEXT:    lmg %r14, %r15, 304(%r15)
 ; VECTOR-NEXT:    br %r14
-  store %Ty4 %A, ptr null
-  call void @fnptr(%Ty4 %A)
+  store %Ty4 %A, ptr @Dst
+  call void @Fnptr(%Ty4 %A)
   ret void
 }
 
 %Ty5 = type {i128, i128}
 declare fastcc %Ty5 @foo5()
-define fastcc void @fun5() {
+define fastcc %Ty5 @fun5() {
 ; CHECK-LABEL: fun5:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
 ; CHECK-NEXT:    .cfi_offset %r14, -48
 ; CHECK-NEXT:    .cfi_offset %r15, -40
 ; CHECK-NEXT:    aghi %r15, -192
 ; CHECK-NEXT:    .cfi_def_cfa_offset 352
+; CHECK-NEXT:    lgr %r13, %r2
 ; CHECK-NEXT:    la %r2, 160(%r15)
 ; CHECK-NEXT:    brasl %r14, foo5 at PLT
-; CHECK-NEXT:    lg %r0, 176(%r15)
-; CHECK-NEXT:    lg %r1, 184(%r15)
+; CHECK-NEXT:    lg %r0, 168(%r15)
+; CHECK-NEXT:    lgrl %r1, Src at GOT
 ; CHECK-NEXT:    lg %r2, 160(%r15)
-; CHECK-NEXT:    lg %r3, 168(%r15)
-; CHECK-NEXT:    stg %r0, 16
-; CHECK-NEXT:    stg %r1, 24
-; CHECK-NEXT:    stg %r2, 0
-; CHECK-NEXT:    stg %r3, 8
-; CHECK-NEXT:    lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT:    alg %r0, 8(%r1)
+; CHECK-NEXT:    lg %r3, 176(%r15)
+; CHECK-NEXT:    lg %r4, 184(%r15)
+; CHECK-NEXT:    alcg %r2, 0(%r1)
+; CHECK-NEXT:    stg %r3, 16(%r13)
+; CHECK-NEXT:    stg %r4, 24(%r13)
+; CHECK-NEXT:    stg %r0, 8(%r13)
+; CHECK-NEXT:    stg %r2, 0(%r13)
+; CHECK-NEXT:    lmg %r13, %r15, 296(%r15)
 ; CHECK-NEXT:    br %r14
 ;
 ; VECTOR-LABEL: fun5:
 ; VECTOR:       # %bb.0:
-; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT:    .cfi_offset %r13, -56
 ; VECTOR-NEXT:    .cfi_offset %r14, -48
 ; VECTOR-NEXT:    .cfi_offset %r15, -40
 ; VECTOR-NEXT:    aghi %r15, -192
 ; VECTOR-NEXT:    .cfi_def_cfa_offset 352
+; VECTOR-NEXT:    lgr %r13, %r2
 ; VECTOR-NEXT:    la %r2, 160(%r15)
 ; VECTOR-NEXT:    brasl %r14, foo5 at PLT
-; VECTOR-NEXT:    vl %v0, 160(%r15), 3
-; VECTOR-NEXT:    vl %v1, 176(%r15), 3
-; VECTOR-NEXT:    vst %v1, 16, 3
-; VECTOR-NEXT:    vst %v0, 0, 3
-; VECTOR-NEXT:    lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    vl %v1, 160(%r15), 3
+; VECTOR-NEXT:    vl %v2, 0(%r1), 3
+; VECTOR-NEXT:    vl %v0, 176(%r15), 3
+; VECTOR-NEXT:    vaq %v1, %v1, %v2
+; VECTOR-NEXT:    vst %v0, 16(%r13), 3
+; VECTOR-NEXT:    vst %v1, 0(%r13), 3
+; VECTOR-NEXT:    lmg %r13, %r15, 296(%r15)
 ; VECTOR-NEXT:    br %r14
-  %A = call %Ty5 @foo5()
-  store %Ty5 %A, ptr null
-  ret void
+  %V = call %Ty5 @foo5()
+  %Val0 = extractvalue %Ty5 %V, 0
+  %Ld = load i128, ptr @Src
+  %Add = add i128 %Val0, %Ld
+  %Res = insertvalue %Ty5 %V, i128 %Add, 0
+  ret %Ty5 %Res
+}
+
+%Ty6 = type {float, i128, i16}
+declare fastcc %Ty6 @foo6()
+define fastcc %Ty6 @fun6() {
+; CHECK-LABEL: fun6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 352
+; CHECK-NEXT:    lgr %r13, %r2
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, foo6 at PLT
+; CHECK-NEXT:    lg %r0, 176(%r15)
+; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    lg %r2, 168(%r15)
+; CHECK-NEXT:    alg %r0, 8(%r1)
+; CHECK-NEXT:    le %f0, 160(%r15)
+; CHECK-NEXT:    lh %r3, 184(%r15)
+; CHECK-NEXT:    alcg %r2, 0(%r1)
+; CHECK-NEXT:    ste %f0, 0(%r13)
+; CHECK-NEXT:    sth %r3, 24(%r13)
+; CHECK-NEXT:    stg %r0, 16(%r13)
+; CHECK-NEXT:    stg %r2, 8(%r13)
+; CHECK-NEXT:    lmg %r13, %r15, 296(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: fun6:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT:    .cfi_offset %r13, -56
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -192
+; VECTOR-NEXT:    .cfi_def_cfa_offset 352
+; VECTOR-NEXT:    lgr %r13, %r2
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    brasl %r14, foo6 at PLT
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    vl %v1, 168(%r15), 3
+; VECTOR-NEXT:    vl %v2, 0(%r1), 3
+; VECTOR-NEXT:    lh %r0, 184(%r15)
+; VECTOR-NEXT:    lde %f0, 160(%r15)
+; VECTOR-NEXT:    vaq %v1, %v1, %v2
+; VECTOR-NEXT:    sth %r0, 24(%r13)
+; VECTOR-NEXT:    vst %v1, 8(%r13), 3
+; VECTOR-NEXT:    ste %f0, 0(%r13)
+; VECTOR-NEXT:    lmg %r13, %r15, 296(%r15)
+; VECTOR-NEXT:    br %r14
+  %V = call %Ty6 @foo6()
+  %Val1 = extractvalue %Ty6 %V, 1
+  %Ld = load i128, ptr @Src
+  %Add = add i128 %Val1, %Ld
+  %Res = insertvalue %Ty6 %V, i128 %Add, 1
+  ret %Ty6 %Res
+}
+
+%Ty7 = type [4 x i128]
+declare fastcc %Ty7 @foo7()
+define fastcc %Ty7 @fun7() {
+; CHECK-LABEL: fun7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r12, %r15, 96(%r15)
+; CHECK-NEXT:    .cfi_offset %r12, -64
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -224
+; CHECK-NEXT:    .cfi_def_cfa_offset 384
+; CHECK-NEXT:    lgr %r13, %r2
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, foo7 at PLT
+; CHECK-NEXT:    lg %r0, 200(%r15)
+; CHECK-NEXT:    lg %r1, 192(%r15)
+; CHECK-NEXT:    lg %r2, 176(%r15)
+; CHECK-NEXT:    lg %r3, 184(%r15)
+; CHECK-NEXT:    lg %r4, 168(%r15)
+; CHECK-NEXT:    lg %r5, 160(%r15)
+; CHECK-NEXT:    lg %r14, 208(%r15)
+; CHECK-NEXT:    lg %r12, 216(%r15)
+; CHECK-NEXT:    algr %r4, %r3
+; CHECK-NEXT:    alcgr %r5, %r2
+; CHECK-NEXT:    stg %r14, 48(%r13)
+; CHECK-NEXT:    stg %r12, 56(%r13)
+; CHECK-NEXT:    stg %r1, 32(%r13)
+; CHECK-NEXT:    stg %r0, 40(%r13)
+; CHECK-NEXT:    stg %r2, 16(%r13)
+; CHECK-NEXT:    stg %r3, 24(%r13)
+; CHECK-NEXT:    stg %r4, 8(%r13)
+; CHECK-NEXT:    stg %r5, 0(%r13)
+; CHECK-NEXT:    lmg %r12, %r15, 320(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: fun7:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT:    .cfi_offset %r13, -56
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -224
+; VECTOR-NEXT:    .cfi_def_cfa_offset 384
+; VECTOR-NEXT:    lgr %r13, %r2
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    brasl %r14, foo7 at PLT
+; VECTOR-NEXT:    vl %v2, 176(%r15), 3
+; VECTOR-NEXT:    vl %v3, 160(%r15), 3
+; VECTOR-NEXT:    vl %v0, 192(%r15), 3
+; VECTOR-NEXT:    vl %v1, 208(%r15), 3
+; VECTOR-NEXT:    vaq %v3, %v3, %v2
+; VECTOR-NEXT:    vst %v1, 48(%r13), 3
+; VECTOR-NEXT:    vst %v0, 32(%r13), 3
+; VECTOR-NEXT:    vst %v2, 16(%r13), 3
+; VECTOR-NEXT:    vst %v3, 0(%r13), 3
+; VECTOR-NEXT:    lmg %r13, %r15, 328(%r15)
+; VECTOR-NEXT:    br %r14
+  %V = call %Ty7 @foo7()
+  %Val0 = extractvalue %Ty7 %V, 0
+  %Val1 = extractvalue %Ty7 %V, 1
+  %Add = add i128 %Val0, %Val1
+  %Res = insertvalue %Ty7 %V, i128 %Add, 0
+  ret %Ty7 %Res
+}
+
+%Ty8 = type {float, [2 x i128], i32}
+declare fastcc %Ty8 @foo8()
+define fastcc %Ty8 @fun8() {
+; CHECK-LABEL: fun8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -208
+; CHECK-NEXT:    .cfi_def_cfa_offset 368
+; CHECK-NEXT:    lgr %r13, %r2
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, foo8 at PLT
+; CHECK-NEXT:    lg %r0, 176(%r15)
+; CHECK-NEXT:    lg %r1, 168(%r15)
+; CHECK-NEXT:    le %f0, 160(%r15)
+; CHECK-NEXT:    lhi %r2, 1
+; CHECK-NEXT:    a %r2, 200(%r15)
+; CHECK-NEXT:    lg %r3, 184(%r15)
+; CHECK-NEXT:    lg %r4, 192(%r15)
+; CHECK-NEXT:    ste %f0, 0(%r13)
+; CHECK-NEXT:    st %r2, 40(%r13)
+; CHECK-NEXT:    stg %r3, 24(%r13)
+; CHECK-NEXT:    stg %r4, 32(%r13)
+; CHECK-NEXT:    stg %r1, 8(%r13)
+; CHECK-NEXT:    stg %r0, 16(%r13)
+; CHECK-NEXT:    lmg %r13, %r15, 312(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: fun8:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT:    .cfi_offset %r13, -56
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -208
+; VECTOR-NEXT:    .cfi_def_cfa_offset 368
+; VECTOR-NEXT:    lgr %r13, %r2
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    brasl %r14, foo8 at PLT
+; VECTOR-NEXT:    lhi %r0, 1
+; VECTOR-NEXT:    a %r0, 200(%r15)
+; VECTOR-NEXT:    lde %f0, 160(%r15)
+; VECTOR-NEXT:    vl %v1, 168(%r15), 3
+; VECTOR-NEXT:    vl %v2, 184(%r15), 3
+; VECTOR-NEXT:    st %r0, 40(%r13)
+; VECTOR-NEXT:    vst %v2, 24(%r13), 3
+; VECTOR-NEXT:    vst %v1, 8(%r13), 3
+; VECTOR-NEXT:    ste %f0, 0(%r13)
+; VECTOR-NEXT:    lmg %r13, %r15, 312(%r15)
+; VECTOR-NEXT:    br %r14
+  %V = call %Ty8 @foo8()
+  %Val2 = extractvalue %Ty8 %V, 2
+  %Add = add i32 %Val2, 1
+  %Res = insertvalue %Ty8 %V, i32 %Add, 2
+  ret %Ty8 %Res
+}
+
+%Ty9 = type {i72}
+declare fastcc %Ty9 @foo9(%Ty9)
+define fastcc %Ty9 @fun9(%Ty9 %A) {
+; CHECK-LABEL: fun9:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -176
+; CHECK-NEXT:    .cfi_def_cfa_offset 336
+; CHECK-NEXT:    lg %r0, 8(%r2)
+; CHECK-NEXT:    lg %r1, 0(%r2)
+; CHECK-NEXT:    stg %r0, 168(%r15)
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    stg %r1, 160(%r15)
+; CHECK-NEXT:    brasl %r14, foo9 at PLT
+; CHECK-NEXT:    lmg %r14, %r15, 288(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: fun9:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT:    .cfi_offset %r13, -56
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -192
+; VECTOR-NEXT:    .cfi_def_cfa_offset 352
+; VECTOR-NEXT:    vl %v0, 0(%r3), 3
+; VECTOR-NEXT:    lgr %r13, %r2
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    la %r3, 176(%r15)
+; VECTOR-NEXT:    vst %v0, 176(%r15), 3
+; VECTOR-NEXT:    brasl %r14, foo9 at PLT
+; VECTOR-NEXT:    vgbm %v0, 0
+; VECTOR-NEXT:    vleb %v0, 168(%r15), 15
+; VECTOR-NEXT:    vlrepg %v1, 160(%r15)
+; VECTOR-NEXT:    vsteg %v1, 0(%r13), 1
+; VECTOR-NEXT:    vsteb %v0, 8(%r13), 15
+; VECTOR-NEXT:    lmg %r13, %r15, 296(%r15)
+; VECTOR-NEXT:    br %r14
+  %Res = call %Ty9 @foo9(%Ty9 %A)
+  ret %Ty9 %Res
 }

>From 25131e075160e0cc0ecde40442c07431da003c37 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulsson at linux.vnet.ibm.com>
Date: Thu, 4 Dec 2025 02:33:30 +0100
Subject: [PATCH 5/5] Fix CanLowerReturn() to return false for any scalar
 integer >64 bits. Tests reworked.

---
 .../Target/SystemZ/SystemZISelLowering.cpp    |    2 +-
 llvm/test/CodeGen/SystemZ/args-22.ll          | 1122 ++++++++++++-----
 2 files changed, 779 insertions(+), 345 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index ab22c0efa8454..2511d08a6d0ef 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -2562,7 +2562,7 @@ bool SystemZTargetLowering::CanLowerReturn(
   // Special case that we cannot easily detect in RetCC_SystemZ since
   // i128 may not be a legal type.
   for (auto &Out : Outs)
-    if (Out.ArgVT == MVT::i128)
+    if (Out.ArgVT.isScalarInteger() && Out.ArgVT.getSizeInBits() > 64)
       return false;
 
   SmallVector<CCValAssign, 16> RetLocs;
diff --git a/llvm/test/CodeGen/SystemZ/args-22.ll b/llvm/test/CodeGen/SystemZ/args-22.ll
index da0f3e08c7a62..ba422b65fc299 100644
--- a/llvm/test/CodeGen/SystemZ/args-22.ll
+++ b/llvm/test/CodeGen/SystemZ/args-22.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s --check-prefix=VECTOR
 ;
-; Test passing IR struct arguments, which do not adhere to the ABI but are
+; Test passing IR struct arguments, which do not adhere to the SystemZ ABI but are
 ; split up with each element passed like a separate argument.
 
 @Fnptr = external global ptr
@@ -10,16 +10,37 @@
 @Dst = external global ptr
 
 %Ty0 = type {i128}
-define fastcc void @fun0(%Ty0 %A) {
-; CHECK-LABEL: fun0:
+define void @arg0(%Ty0 %A) {
+; CHECK-LABEL: arg0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lg %r0, 8(%r2)
+; CHECK-NEXT:    lgrl %r1, Dst at GOT
+; CHECK-NEXT:    lg %r2, 0(%r2)
+; CHECK-NEXT:    stg %r0, 8(%r1)
+; CHECK-NEXT:    stg %r2, 0(%r1)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: arg0:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    vl %v0, 0(%r2), 3
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
+; VECTOR-NEXT:    vst %v0, 0(%r1), 3
+; VECTOR-NEXT:    br %r14
+  store %Ty0 %A, ptr @Dst
+  ret void
+}
+
+define void @call0() {
+; CHECK-LABEL: call0:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
 ; CHECK-NEXT:    .cfi_offset %r14, -48
 ; CHECK-NEXT:    .cfi_offset %r15, -40
 ; CHECK-NEXT:    aghi %r15, -176
 ; CHECK-NEXT:    .cfi_def_cfa_offset 336
-; CHECK-NEXT:    lg %r0, 8(%r2)
-; CHECK-NEXT:    lg %r1, 0(%r2)
+; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    lg %r0, 8(%r1)
+; CHECK-NEXT:    lg %r1, 0(%r1)
 ; CHECK-NEXT:    stg %r0, 168(%r15)
 ; CHECK-NEXT:    la %r2, 160(%r15)
 ; CHECK-NEXT:    stg %r1, 160(%r15)
@@ -27,269 +48,248 @@ define fastcc void @fun0(%Ty0 %A) {
 ; CHECK-NEXT:    lmg %r14, %r15, 288(%r15)
 ; CHECK-NEXT:    br %r14
 ;
-; VECTOR-LABEL: fun0:
+; VECTOR-LABEL: call0:
 ; VECTOR:       # %bb.0:
 ; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
 ; VECTOR-NEXT:    .cfi_offset %r14, -48
 ; VECTOR-NEXT:    .cfi_offset %r15, -40
 ; VECTOR-NEXT:    aghi %r15, -176
 ; VECTOR-NEXT:    .cfi_def_cfa_offset 336
-; VECTOR-NEXT:    vl %v0, 0(%r2), 3
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    vl %v0, 0(%r1), 3
 ; VECTOR-NEXT:    la %r2, 160(%r15)
 ; VECTOR-NEXT:    vst %v0, 160(%r15), 3
 ; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
 ; VECTOR-NEXT:    lmg %r14, %r15, 288(%r15)
 ; VECTOR-NEXT:    br %r14
-  call void @Fnptr(%Ty0 %A)
+  %L = load %Ty0, ptr @Src
+  call void @Fnptr(%Ty0 %L)
   ret void
 }
 
-%Ty1 = type {i128, i128}
-define fastcc void @fun1(%Ty1 %A, %Ty1 %B) {
-; CHECK-LABEL: fun1:
+define %Ty0 @ret0() {
+; CHECK-LABEL: ret0:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
 ; CHECK-NEXT:    .cfi_offset %r13, -56
 ; CHECK-NEXT:    .cfi_offset %r14, -48
 ; CHECK-NEXT:    .cfi_offset %r15, -40
-; CHECK-NEXT:    aghi %r15, -224
-; CHECK-NEXT:    .cfi_def_cfa_offset 384
-; CHECK-NEXT:    lg %r0, 0(%r2)
-; CHECK-NEXT:    lg %r1, 8(%r2)
-; CHECK-NEXT:    lg %r2, 0(%r3)
-; CHECK-NEXT:    lg %r3, 8(%r3)
-; CHECK-NEXT:    lg %r14, 8(%r5)
-; CHECK-NEXT:    lg %r5, 0(%r5)
-; CHECK-NEXT:    lg %r13, 8(%r4)
-; CHECK-NEXT:    lg %r4, 0(%r4)
-; CHECK-NEXT:    stg %r14, 168(%r15)
-; CHECK-NEXT:    stg %r5, 160(%r15)
-; CHECK-NEXT:    stg %r13, 184(%r15)
-; CHECK-NEXT:    stg %r4, 176(%r15)
-; CHECK-NEXT:    stg %r3, 200(%r15)
-; CHECK-NEXT:    stg %r2, 192(%r15)
-; CHECK-NEXT:    stg %r1, 216(%r15)
-; CHECK-NEXT:    la %r2, 208(%r15)
-; CHECK-NEXT:    la %r3, 192(%r15)
-; CHECK-NEXT:    la %r4, 176(%r15)
-; CHECK-NEXT:    la %r5, 160(%r15)
-; CHECK-NEXT:    stg %r0, 208(%r15)
+; CHECK-NEXT:    aghi %r15, -176
+; CHECK-NEXT:    .cfi_def_cfa_offset 336
+; CHECK-NEXT:    lgr %r13, %r2
+; CHECK-NEXT:    la %r2, 160(%r15)
 ; CHECK-NEXT:    brasl %r14, Fnptr at PLT
-; CHECK-NEXT:    lmg %r13, %r15, 328(%r15)
+; CHECK-NEXT:    lg %r0, 168(%r15)
+; CHECK-NEXT:    lg %r1, 160(%r15)
+; CHECK-NEXT:    stg %r0, 8(%r13)
+; CHECK-NEXT:    stg %r1, 0(%r13)
+; CHECK-NEXT:    lmg %r13, %r15, 280(%r15)
 ; CHECK-NEXT:    br %r14
 ;
-; VECTOR-LABEL: fun1:
+; VECTOR-LABEL: ret0:
 ; VECTOR:       # %bb.0:
-; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT:    .cfi_offset %r13, -56
 ; VECTOR-NEXT:    .cfi_offset %r14, -48
 ; VECTOR-NEXT:    .cfi_offset %r15, -40
-; VECTOR-NEXT:    aghi %r15, -224
-; VECTOR-NEXT:    .cfi_def_cfa_offset 384
-; VECTOR-NEXT:    vl %v0, 0(%r2), 3
-; VECTOR-NEXT:    vl %v1, 0(%r3), 3
-; VECTOR-NEXT:    vl %v2, 0(%r4), 3
-; VECTOR-NEXT:    vl %v3, 0(%r5), 3
-; VECTOR-NEXT:    la %r2, 208(%r15)
-; VECTOR-NEXT:    la %r3, 192(%r15)
-; VECTOR-NEXT:    la %r4, 176(%r15)
-; VECTOR-NEXT:    la %r5, 160(%r15)
-; VECTOR-NEXT:    vst %v3, 160(%r15), 3
-; VECTOR-NEXT:    vst %v2, 176(%r15), 3
-; VECTOR-NEXT:    vst %v1, 192(%r15), 3
-; VECTOR-NEXT:    vst %v0, 208(%r15), 3
+; VECTOR-NEXT:    aghi %r15, -176
+; VECTOR-NEXT:    .cfi_def_cfa_offset 336
+; VECTOR-NEXT:    lgr %r13, %r2
+; VECTOR-NEXT:    la %r2, 160(%r15)
 ; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
-; VECTOR-NEXT:    lmg %r14, %r15, 336(%r15)
+; VECTOR-NEXT:    vl %v0, 160(%r15), 3
+; VECTOR-NEXT:    vst %v0, 0(%r13), 3
+; VECTOR-NEXT:    lmg %r13, %r15, 280(%r15)
+; VECTOR-NEXT:    br %r14
+  %C = call %Ty0 @Fnptr()
+  ret %Ty0 %C
+}
+
+%Ty1 = type {i72}
+define void @arg1(%Ty1 %A) {
+; CHECK-LABEL: arg1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lg %r0, 8(%r2)
+; CHECK-NEXT:    lgrl %r1, Dst at GOT
+; CHECK-NEXT:    lg %r2, 0(%r2)
+; CHECK-NEXT:    stc %r0, 8(%r1)
+; CHECK-NEXT:    sllg %r2, %r2, 56
+; CHECK-NEXT:    rosbg %r2, %r0, 8, 63, 56
+; CHECK-NEXT:    stg %r2, 0(%r1)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: arg1:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    vl %v0, 0(%r2), 3
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
+; VECTOR-NEXT:    vrepib %v1, 8
+; VECTOR-NEXT:    vsteb %v0, 8(%r1), 15
+; VECTOR-NEXT:    vsrlb %v0, %v0, %v1
+; VECTOR-NEXT:    vsteg %v0, 0(%r1), 1
 ; VECTOR-NEXT:    br %r14
-  call void @Fnptr(%Ty1 %A, %Ty1 %B)
+  store %Ty1 %A, ptr @Dst
   ret void
 }
 
-%Ty2 = type {i256}
-define fastcc void @fun2(%Ty2 %A, %Ty2 %B) {
-; CHECK-LABEL: fun2:
+define void @call1() {
+; CHECK-LABEL: call1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
-; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
 ; CHECK-NEXT:    .cfi_offset %r14, -48
 ; CHECK-NEXT:    .cfi_offset %r15, -40
-; CHECK-NEXT:    aghi %r15, -224
-; CHECK-NEXT:    .cfi_def_cfa_offset 384
-; CHECK-NEXT:    lg %r0, 0(%r2)
-; CHECK-NEXT:    lg %r1, 8(%r2)
-; CHECK-NEXT:    lg %r4, 16(%r2)
-; CHECK-NEXT:    lg %r2, 24(%r2)
-; CHECK-NEXT:    lg %r5, 24(%r3)
-; CHECK-NEXT:    lg %r14, 16(%r3)
-; CHECK-NEXT:    lg %r13, 8(%r3)
-; CHECK-NEXT:    lg %r3, 0(%r3)
-; CHECK-NEXT:    stg %r5, 184(%r15)
-; CHECK-NEXT:    stg %r14, 176(%r15)
-; CHECK-NEXT:    stg %r13, 168(%r15)
-; CHECK-NEXT:    stg %r3, 160(%r15)
-; CHECK-NEXT:    stg %r2, 216(%r15)
-; CHECK-NEXT:    stg %r4, 208(%r15)
-; CHECK-NEXT:    stg %r1, 200(%r15)
-; CHECK-NEXT:    la %r2, 192(%r15)
-; CHECK-NEXT:    la %r3, 160(%r15)
-; CHECK-NEXT:    stg %r0, 192(%r15)
+; CHECK-NEXT:    aghi %r15, -176
+; CHECK-NEXT:    .cfi_def_cfa_offset 336
+; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    lg %r0, 0(%r1)
+; CHECK-NEXT:    sllg %r2, %r0, 8
+; CHECK-NEXT:    ic %r2, 8(%r1)
+; CHECK-NEXT:    srlg %r0, %r0, 56
+; CHECK-NEXT:    stg %r2, 168(%r15)
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    stg %r0, 160(%r15)
 ; CHECK-NEXT:    brasl %r14, Fnptr at PLT
-; CHECK-NEXT:    lmg %r13, %r15, 328(%r15)
+; CHECK-NEXT:    lmg %r14, %r15, 288(%r15)
 ; CHECK-NEXT:    br %r14
 ;
-; VECTOR-LABEL: fun2:
+; VECTOR-LABEL: call1:
 ; VECTOR:       # %bb.0:
 ; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
 ; VECTOR-NEXT:    .cfi_offset %r14, -48
 ; VECTOR-NEXT:    .cfi_offset %r15, -40
-; VECTOR-NEXT:    aghi %r15, -224
-; VECTOR-NEXT:    .cfi_def_cfa_offset 384
-; VECTOR-NEXT:    vl %v0, 0(%r2), 3
-; VECTOR-NEXT:    vl %v1, 16(%r2), 3
-; VECTOR-NEXT:    vl %v2, 0(%r3), 3
-; VECTOR-NEXT:    vl %v3, 16(%r3), 3
-; VECTOR-NEXT:    la %r2, 192(%r15)
-; VECTOR-NEXT:    la %r3, 160(%r15)
-; VECTOR-NEXT:    vst %v3, 176(%r15), 3
-; VECTOR-NEXT:    vst %v2, 160(%r15), 3
-; VECTOR-NEXT:    vst %v1, 208(%r15), 3
-; VECTOR-NEXT:    vst %v0, 192(%r15), 3
+; VECTOR-NEXT:    aghi %r15, -176
+; VECTOR-NEXT:    .cfi_def_cfa_offset 336
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    vgbm %v0, 0
+; VECTOR-NEXT:    vleb %v0, 8(%r1), 15
+; VECTOR-NEXT:    vlrepg %v1, 0(%r1)
+; VECTOR-NEXT:    vrepib %v2, 8
+; VECTOR-NEXT:    vslb %v1, %v1, %v2
+; VECTOR-NEXT:    vo %v0, %v0, %v1
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    vst %v0, 160(%r15), 3
 ; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
-; VECTOR-NEXT:    lmg %r14, %r15, 336(%r15)
+; VECTOR-NEXT:    lmg %r14, %r15, 288(%r15)
 ; VECTOR-NEXT:    br %r14
-  call void @Fnptr(%Ty2 %A, %Ty2 %B)
+  %L = load %Ty1, ptr @Src
+  call void @Fnptr(%Ty1 %L)
   ret void
 }
 
-%Ty3 = type {float, i256, i32, i128, i8}
-define fastcc void @fun3(%Ty3 %A) {
-; CHECK-LABEL: fun3:
+define %Ty1 @ret1() {
+; CHECK-LABEL: ret1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    stmg %r12, %r15, 96(%r15)
-; CHECK-NEXT:    .cfi_offset %r12, -64
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
 ; CHECK-NEXT:    .cfi_offset %r13, -56
 ; CHECK-NEXT:    .cfi_offset %r14, -48
 ; CHECK-NEXT:    .cfi_offset %r15, -40
-; CHECK-NEXT:    aghi %r15, -208
-; CHECK-NEXT:    .cfi_def_cfa_offset 368
-; CHECK-NEXT:    lg %r0, 0(%r2)
-; CHECK-NEXT:    lg %r1, 8(%r2)
-; CHECK-NEXT:    lg %r14, 16(%r2)
-; CHECK-NEXT:    lgrl %r13, Dst at GOT
-; CHECK-NEXT:    lg %r2, 24(%r2)
-; CHECK-NEXT:    lg %r12, 0(%r4)
-; CHECK-NEXT:    lg %r4, 8(%r4)
-; CHECK-NEXT:    stc %r5, 64(%r13)
-; CHECK-NEXT:    st %r3, 40(%r13)
-; CHECK-NEXT:    ste %f0, 0(%r13)
-; CHECK-NEXT:    stg %r4, 56(%r13)
-; CHECK-NEXT:    stg %r12, 48(%r13)
-; CHECK-NEXT:    stg %r2, 32(%r13)
-; CHECK-NEXT:    stg %r14, 24(%r13)
-; CHECK-NEXT:    stg %r1, 16(%r13)
-; CHECK-NEXT:    stg %r0, 8(%r13)
-; CHECK-NEXT:    stg %r4, 168(%r15)
-; CHECK-NEXT:    stg %r12, 160(%r15)
-; CHECK-NEXT:    stg %r2, 200(%r15)
-; CHECK-NEXT:    stg %r14, 192(%r15)
-; CHECK-NEXT:    stg %r1, 184(%r15)
-; CHECK-NEXT:    la %r2, 176(%r15)
-; CHECK-NEXT:    la %r4, 160(%r15)
-; CHECK-NEXT:    stg %r0, 176(%r15)
+; CHECK-NEXT:    aghi %r15, -176
+; CHECK-NEXT:    .cfi_def_cfa_offset 336
+; CHECK-NEXT:    lgr %r13, %r2
+; CHECK-NEXT:    la %r2, 160(%r15)
 ; CHECK-NEXT:    brasl %r14, Fnptr at PLT
-; CHECK-NEXT:    lmg %r12, %r15, 304(%r15)
+; CHECK-NEXT:    lg %r0, 160(%r15)
+; CHECK-NEXT:    llgc %r1, 168(%r15)
+; CHECK-NEXT:    stg %r0, 0(%r13)
+; CHECK-NEXT:    stc %r1, 8(%r13)
+; CHECK-NEXT:    lmg %r13, %r15, 280(%r15)
 ; CHECK-NEXT:    br %r14
 ;
-; VECTOR-LABEL: fun3:
+; VECTOR-LABEL: ret1:
 ; VECTOR:       # %bb.0:
-; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT:    .cfi_offset %r13, -56
 ; VECTOR-NEXT:    .cfi_offset %r14, -48
 ; VECTOR-NEXT:    .cfi_offset %r15, -40
-; VECTOR-NEXT:    aghi %r15, -208
-; VECTOR-NEXT:    .cfi_def_cfa_offset 368
-; VECTOR-NEXT:    vl %v1, 0(%r4), 3
-; VECTOR-NEXT:    vl %v2, 0(%r2), 3
-; VECTOR-NEXT:    vl %v3, 16(%r2), 3
-; VECTOR-NEXT:    lgrl %r1, Dst at GOT
-; VECTOR-NEXT:    la %r2, 176(%r15)
-; VECTOR-NEXT:    la %r4, 160(%r15)
-; VECTOR-NEXT:    stc %r5, 64(%r1)
-; VECTOR-NEXT:    st %r3, 40(%r1)
-; VECTOR-NEXT:    ste %f0, 0(%r1)
-; VECTOR-NEXT:    vst %v3, 24(%r1), 3
-; VECTOR-NEXT:    vst %v2, 8(%r1), 3
-; VECTOR-NEXT:    vst %v1, 48(%r1), 3
-; VECTOR-NEXT:    vst %v1, 160(%r15), 3
-; VECTOR-NEXT:    vst %v3, 192(%r15), 3
-; VECTOR-NEXT:    vst %v2, 176(%r15), 3
+; VECTOR-NEXT:    aghi %r15, -176
+; VECTOR-NEXT:    .cfi_def_cfa_offset 336
+; VECTOR-NEXT:    lgr %r13, %r2
+; VECTOR-NEXT:    la %r2, 160(%r15)
 ; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
-; VECTOR-NEXT:    lmg %r14, %r15, 320(%r15)
+; VECTOR-NEXT:    vgbm %v0, 0
+; VECTOR-NEXT:    vleb %v0, 168(%r15), 15
+; VECTOR-NEXT:    vlrepg %v1, 160(%r15)
+; VECTOR-NEXT:    vsteg %v1, 0(%r13), 1
+; VECTOR-NEXT:    vsteb %v0, 8(%r13), 15
+; VECTOR-NEXT:    lmg %r13, %r15, 280(%r15)
 ; VECTOR-NEXT:    br %r14
-  store %Ty3 %A, ptr @Dst
-  call void @Fnptr(%Ty3 %A)
-  ret void
+  %C = call %Ty1 @Fnptr()
+  ret %Ty1 %C
 }
 
-%Ty4 = type {i72, i128}
-define fastcc void @fun4(%Ty4 %A) {
-; CHECK-LABEL: fun4:
+%Ty2 = type {i128, i128}
+define void @arg2(%Ty2 %A) {
+; CHECK-LABEL: arg2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
-; CHECK-NEXT:    .cfi_offset %r14, -48
-; CHECK-NEXT:    .cfi_offset %r15, -40
-; CHECK-NEXT:    aghi %r15, -192
-; CHECK-NEXT:    .cfi_def_cfa_offset 352
 ; CHECK-NEXT:    lg %r0, 8(%r3)
 ; CHECK-NEXT:    lgrl %r1, Dst at GOT
 ; CHECK-NEXT:    lg %r3, 0(%r3)
 ; CHECK-NEXT:    lg %r4, 8(%r2)
-; CHECK-NEXT:    lg %r5, 0(%r2)
+; CHECK-NEXT:    lg %r2, 0(%r2)
 ; CHECK-NEXT:    stg %r0, 24(%r1)
 ; CHECK-NEXT:    stg %r3, 16(%r1)
-; CHECK-NEXT:    stc %r4, 8(%r1)
-; CHECK-NEXT:    sllg %r2, %r5, 56
-; CHECK-NEXT:    rosbg %r2, %r4, 8, 63, 56
+; CHECK-NEXT:    stg %r4, 8(%r1)
 ; CHECK-NEXT:    stg %r2, 0(%r1)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: arg2:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    vl %v0, 0(%r2), 3
+; VECTOR-NEXT:    vl %v1, 0(%r3), 3
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
+; VECTOR-NEXT:    vst %v1, 16(%r1), 3
+; VECTOR-NEXT:    vst %v0, 0(%r1), 3
+; VECTOR-NEXT:    br %r14
+  store %Ty2 %A, ptr @Dst
+  ret void
+}
+
+define void @call2() {
+; CHECK-LABEL: call2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 352
+; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    lg %r0, 24(%r1)
+; CHECK-NEXT:    lg %r2, 16(%r1)
+; CHECK-NEXT:    lg %r3, 8(%r1)
+; CHECK-NEXT:    lg %r1, 0(%r1)
 ; CHECK-NEXT:    stg %r0, 168(%r15)
-; CHECK-NEXT:    stg %r3, 160(%r15)
-; CHECK-NEXT:    stg %r4, 184(%r15)
+; CHECK-NEXT:    stg %r2, 160(%r15)
+; CHECK-NEXT:    stg %r3, 184(%r15)
 ; CHECK-NEXT:    la %r2, 176(%r15)
 ; CHECK-NEXT:    la %r3, 160(%r15)
-; CHECK-NEXT:    stg %r5, 176(%r15)
+; CHECK-NEXT:    stg %r1, 176(%r15)
 ; CHECK-NEXT:    brasl %r14, Fnptr at PLT
 ; CHECK-NEXT:    lmg %r14, %r15, 304(%r15)
 ; CHECK-NEXT:    br %r14
 ;
-; VECTOR-LABEL: fun4:
+; VECTOR-LABEL: call2:
 ; VECTOR:       # %bb.0:
 ; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
 ; VECTOR-NEXT:    .cfi_offset %r14, -48
 ; VECTOR-NEXT:    .cfi_offset %r15, -40
 ; VECTOR-NEXT:    aghi %r15, -192
 ; VECTOR-NEXT:    .cfi_def_cfa_offset 352
-; VECTOR-NEXT:    vl %v1, 0(%r2), 3
-; VECTOR-NEXT:    vl %v0, 0(%r3), 3
-; VECTOR-NEXT:    lgrl %r1, Dst at GOT
-; VECTOR-NEXT:    vrepib %v2, 8
-; VECTOR-NEXT:    vsrlb %v2, %v1, %v2
-; VECTOR-NEXT:    vsteb %v1, 8(%r1), 15
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    vl %v0, 0(%r1), 3
+; VECTOR-NEXT:    vl %v1, 16(%r1), 3
 ; VECTOR-NEXT:    la %r2, 176(%r15)
 ; VECTOR-NEXT:    la %r3, 160(%r15)
-; VECTOR-NEXT:    vst %v0, 16(%r1), 3
-; VECTOR-NEXT:    vsteg %v2, 0(%r1), 1
-; VECTOR-NEXT:    vst %v0, 160(%r15), 3
-; VECTOR-NEXT:    vst %v1, 176(%r15), 3
+; VECTOR-NEXT:    vst %v1, 160(%r15), 3
+; VECTOR-NEXT:    vst %v0, 176(%r15), 3
 ; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
 ; VECTOR-NEXT:    lmg %r14, %r15, 304(%r15)
 ; VECTOR-NEXT:    br %r14
-  store %Ty4 %A, ptr @Dst
-  call void @Fnptr(%Ty4 %A)
+  %L = load %Ty2, ptr @Src
+  call void @Fnptr(%Ty2 %L)
   ret void
 }
 
-%Ty5 = type {i128, i128}
-declare fastcc %Ty5 @foo5()
-define fastcc %Ty5 @fun5() {
-; CHECK-LABEL: fun5:
+define %Ty2 @ret2() {
+; CHECK-LABEL: ret2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
 ; CHECK-NEXT:    .cfi_offset %r13, -56
@@ -299,22 +299,19 @@ define fastcc %Ty5 @fun5() {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 352
 ; CHECK-NEXT:    lgr %r13, %r2
 ; CHECK-NEXT:    la %r2, 160(%r15)
-; CHECK-NEXT:    brasl %r14, foo5 at PLT
-; CHECK-NEXT:    lg %r0, 168(%r15)
-; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lg %r0, 176(%r15)
+; CHECK-NEXT:    lg %r1, 184(%r15)
 ; CHECK-NEXT:    lg %r2, 160(%r15)
-; CHECK-NEXT:    alg %r0, 8(%r1)
-; CHECK-NEXT:    lg %r3, 176(%r15)
-; CHECK-NEXT:    lg %r4, 184(%r15)
-; CHECK-NEXT:    alcg %r2, 0(%r1)
-; CHECK-NEXT:    stg %r3, 16(%r13)
-; CHECK-NEXT:    stg %r4, 24(%r13)
-; CHECK-NEXT:    stg %r0, 8(%r13)
+; CHECK-NEXT:    lg %r3, 168(%r15)
+; CHECK-NEXT:    stg %r0, 16(%r13)
+; CHECK-NEXT:    stg %r1, 24(%r13)
 ; CHECK-NEXT:    stg %r2, 0(%r13)
+; CHECK-NEXT:    stg %r3, 8(%r13)
 ; CHECK-NEXT:    lmg %r13, %r15, 296(%r15)
 ; CHECK-NEXT:    br %r14
 ;
-; VECTOR-LABEL: fun5:
+; VECTOR-LABEL: ret2:
 ; VECTOR:       # %bb.0:
 ; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
 ; VECTOR-NEXT:    .cfi_offset %r13, -56
@@ -324,152 +321,244 @@ define fastcc %Ty5 @fun5() {
 ; VECTOR-NEXT:    .cfi_def_cfa_offset 352
 ; VECTOR-NEXT:    lgr %r13, %r2
 ; VECTOR-NEXT:    la %r2, 160(%r15)
-; VECTOR-NEXT:    brasl %r14, foo5 at PLT
-; VECTOR-NEXT:    lgrl %r1, Src at GOT
-; VECTOR-NEXT:    vl %v1, 160(%r15), 3
-; VECTOR-NEXT:    vl %v2, 0(%r1), 3
-; VECTOR-NEXT:    vl %v0, 176(%r15), 3
-; VECTOR-NEXT:    vaq %v1, %v1, %v2
-; VECTOR-NEXT:    vst %v0, 16(%r13), 3
-; VECTOR-NEXT:    vst %v1, 0(%r13), 3
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    vl %v0, 160(%r15), 3
+; VECTOR-NEXT:    vl %v1, 176(%r15), 3
+; VECTOR-NEXT:    vst %v1, 16(%r13), 3
+; VECTOR-NEXT:    vst %v0, 0(%r13), 3
 ; VECTOR-NEXT:    lmg %r13, %r15, 296(%r15)
 ; VECTOR-NEXT:    br %r14
-  %V = call %Ty5 @foo5()
-  %Val0 = extractvalue %Ty5 %V, 0
-  %Ld = load i128, ptr @Src
-  %Add = add i128 %Val0, %Ld
-  %Res = insertvalue %Ty5 %V, i128 %Add, 0
-  ret %Ty5 %Res
+  %C = call %Ty2 @Fnptr()
+  ret %Ty2 %C
 }
 
-%Ty6 = type {float, i128, i16}
-declare fastcc %Ty6 @foo6()
-define fastcc %Ty6 @fun6() {
-; CHECK-LABEL: fun6:
+%Ty3 = type {i72, i128}
+define void @arg3(%Ty3 %A) {
+; CHECK-LABEL: arg3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
-; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    lg %r0, 8(%r3)
+; CHECK-NEXT:    lgrl %r1, Dst at GOT
+; CHECK-NEXT:    lg %r3, 0(%r3)
+; CHECK-NEXT:    lg %r4, 8(%r2)
+; CHECK-NEXT:    lg %r2, 0(%r2)
+; CHECK-NEXT:    stg %r0, 24(%r1)
+; CHECK-NEXT:    stg %r3, 16(%r1)
+; CHECK-NEXT:    stc %r4, 8(%r1)
+; CHECK-NEXT:    sllg %r0, %r2, 56
+; CHECK-NEXT:    rosbg %r0, %r4, 8, 63, 56
+; CHECK-NEXT:    stg %r0, 0(%r1)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: arg3:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    vl %v0, 0(%r3), 3
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
+; VECTOR-NEXT:    vl %v1, 0(%r2), 3
+; VECTOR-NEXT:    vsteb %v1, 8(%r1), 15
+; VECTOR-NEXT:    vst %v0, 16(%r1), 3
+; VECTOR-NEXT:    vrepib %v0, 8
+; VECTOR-NEXT:    vsrlb %v0, %v1, %v0
+; VECTOR-NEXT:    vsteg %v0, 0(%r1), 1
+; VECTOR-NEXT:    br %r14
+  store %Ty3 %A, ptr @Dst
+  ret void
+}
+
+define void @call3() {
+; CHECK-LABEL: call3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
 ; CHECK-NEXT:    .cfi_offset %r14, -48
 ; CHECK-NEXT:    .cfi_offset %r15, -40
 ; CHECK-NEXT:    aghi %r15, -192
 ; CHECK-NEXT:    .cfi_def_cfa_offset 352
-; CHECK-NEXT:    lgr %r13, %r2
-; CHECK-NEXT:    la %r2, 160(%r15)
-; CHECK-NEXT:    brasl %r14, foo6 at PLT
-; CHECK-NEXT:    lg %r0, 176(%r15)
 ; CHECK-NEXT:    lgrl %r1, Src at GOT
-; CHECK-NEXT:    lg %r2, 168(%r15)
-; CHECK-NEXT:    alg %r0, 8(%r1)
-; CHECK-NEXT:    le %f0, 160(%r15)
-; CHECK-NEXT:    lh %r3, 184(%r15)
-; CHECK-NEXT:    alcg %r2, 0(%r1)
-; CHECK-NEXT:    ste %f0, 0(%r13)
-; CHECK-NEXT:    sth %r3, 24(%r13)
-; CHECK-NEXT:    stg %r0, 16(%r13)
-; CHECK-NEXT:    stg %r2, 8(%r13)
-; CHECK-NEXT:    lmg %r13, %r15, 296(%r15)
+; CHECK-NEXT:    lg %r0, 0(%r1)
+; CHECK-NEXT:    sllg %r2, %r0, 8
+; CHECK-NEXT:    lg %r3, 24(%r1)
+; CHECK-NEXT:    lg %r4, 16(%r1)
+; CHECK-NEXT:    ic %r2, 8(%r1)
+; CHECK-NEXT:    srlg %r0, %r0, 56
+; CHECK-NEXT:    stg %r3, 168(%r15)
+; CHECK-NEXT:    stg %r4, 160(%r15)
+; CHECK-NEXT:    stg %r2, 184(%r15)
+; CHECK-NEXT:    la %r2, 176(%r15)
+; CHECK-NEXT:    la %r3, 160(%r15)
+; CHECK-NEXT:    stg %r0, 176(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lmg %r14, %r15, 304(%r15)
 ; CHECK-NEXT:    br %r14
 ;
-; VECTOR-LABEL: fun6:
+; VECTOR-LABEL: call3:
 ; VECTOR:       # %bb.0:
-; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
-; VECTOR-NEXT:    .cfi_offset %r13, -56
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
 ; VECTOR-NEXT:    .cfi_offset %r14, -48
 ; VECTOR-NEXT:    .cfi_offset %r15, -40
 ; VECTOR-NEXT:    aghi %r15, -192
 ; VECTOR-NEXT:    .cfi_def_cfa_offset 352
-; VECTOR-NEXT:    lgr %r13, %r2
-; VECTOR-NEXT:    la %r2, 160(%r15)
-; VECTOR-NEXT:    brasl %r14, foo6 at PLT
 ; VECTOR-NEXT:    lgrl %r1, Src at GOT
-; VECTOR-NEXT:    vl %v1, 168(%r15), 3
-; VECTOR-NEXT:    vl %v2, 0(%r1), 3
-; VECTOR-NEXT:    lh %r0, 184(%r15)
-; VECTOR-NEXT:    lde %f0, 160(%r15)
-; VECTOR-NEXT:    vaq %v1, %v1, %v2
-; VECTOR-NEXT:    sth %r0, 24(%r13)
-; VECTOR-NEXT:    vst %v1, 8(%r13), 3
-; VECTOR-NEXT:    ste %f0, 0(%r13)
-; VECTOR-NEXT:    lmg %r13, %r15, 296(%r15)
+; VECTOR-NEXT:    vgbm %v0, 0
+; VECTOR-NEXT:    vleb %v0, 8(%r1), 15
+; VECTOR-NEXT:    vlrepg %v1, 0(%r1)
+; VECTOR-NEXT:    vrepib %v2, 8
+; VECTOR-NEXT:    vslb %v1, %v1, %v2
+; VECTOR-NEXT:    vo %v0, %v0, %v1
+; VECTOR-NEXT:    vl %v1, 16(%r1), 3
+; VECTOR-NEXT:    la %r2, 176(%r15)
+; VECTOR-NEXT:    la %r3, 160(%r15)
+; VECTOR-NEXT:    vst %v1, 160(%r15), 3
+; VECTOR-NEXT:    vst %v0, 176(%r15), 3
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    lmg %r14, %r15, 304(%r15)
 ; VECTOR-NEXT:    br %r14
-  %V = call %Ty6 @foo6()
-  %Val1 = extractvalue %Ty6 %V, 1
-  %Ld = load i128, ptr @Src
-  %Add = add i128 %Val1, %Ld
-  %Res = insertvalue %Ty6 %V, i128 %Add, 1
-  ret %Ty6 %Res
+  %L = load %Ty3, ptr @Src
+  call void @Fnptr(%Ty3 %L)
+  ret void
 }
 
-%Ty7 = type [4 x i128]
-declare fastcc %Ty7 @foo7()
-define fastcc %Ty7 @fun7() {
-; CHECK-LABEL: fun7:
+define %Ty3 @ret3() {
+; CHECK-LABEL: ret3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    stmg %r12, %r15, 96(%r15)
-; CHECK-NEXT:    .cfi_offset %r12, -64
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
 ; CHECK-NEXT:    .cfi_offset %r13, -56
 ; CHECK-NEXT:    .cfi_offset %r14, -48
 ; CHECK-NEXT:    .cfi_offset %r15, -40
-; CHECK-NEXT:    aghi %r15, -224
-; CHECK-NEXT:    .cfi_def_cfa_offset 384
+; CHECK-NEXT:    aghi %r15, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 352
 ; CHECK-NEXT:    lgr %r13, %r2
 ; CHECK-NEXT:    la %r2, 160(%r15)
-; CHECK-NEXT:    brasl %r14, foo7 at PLT
-; CHECK-NEXT:    lg %r0, 200(%r15)
-; CHECK-NEXT:    lg %r1, 192(%r15)
-; CHECK-NEXT:    lg %r2, 176(%r15)
-; CHECK-NEXT:    lg %r3, 184(%r15)
-; CHECK-NEXT:    lg %r4, 168(%r15)
-; CHECK-NEXT:    lg %r5, 160(%r15)
-; CHECK-NEXT:    lg %r14, 208(%r15)
-; CHECK-NEXT:    lg %r12, 216(%r15)
-; CHECK-NEXT:    algr %r4, %r3
-; CHECK-NEXT:    alcgr %r5, %r2
-; CHECK-NEXT:    stg %r14, 48(%r13)
-; CHECK-NEXT:    stg %r12, 56(%r13)
-; CHECK-NEXT:    stg %r1, 32(%r13)
-; CHECK-NEXT:    stg %r0, 40(%r13)
-; CHECK-NEXT:    stg %r2, 16(%r13)
-; CHECK-NEXT:    stg %r3, 24(%r13)
-; CHECK-NEXT:    stg %r4, 8(%r13)
-; CHECK-NEXT:    stg %r5, 0(%r13)
-; CHECK-NEXT:    lmg %r12, %r15, 320(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lg %r0, 176(%r15)
+; CHECK-NEXT:    lg %r1, 184(%r15)
+; CHECK-NEXT:    lg %r2, 160(%r15)
+; CHECK-NEXT:    llgc %r3, 168(%r15)
+; CHECK-NEXT:    stg %r0, 16(%r13)
+; CHECK-NEXT:    stg %r1, 24(%r13)
+; CHECK-NEXT:    stg %r2, 0(%r13)
+; CHECK-NEXT:    stc %r3, 8(%r13)
+; CHECK-NEXT:    lmg %r13, %r15, 296(%r15)
 ; CHECK-NEXT:    br %r14
 ;
-; VECTOR-LABEL: fun7:
+; VECTOR-LABEL: ret3:
 ; VECTOR:       # %bb.0:
 ; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
 ; VECTOR-NEXT:    .cfi_offset %r13, -56
 ; VECTOR-NEXT:    .cfi_offset %r14, -48
 ; VECTOR-NEXT:    .cfi_offset %r15, -40
-; VECTOR-NEXT:    aghi %r15, -224
-; VECTOR-NEXT:    .cfi_def_cfa_offset 384
+; VECTOR-NEXT:    aghi %r15, -192
+; VECTOR-NEXT:    .cfi_def_cfa_offset 352
 ; VECTOR-NEXT:    lgr %r13, %r2
 ; VECTOR-NEXT:    la %r2, 160(%r15)
-; VECTOR-NEXT:    brasl %r14, foo7 at PLT
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    vgbm %v0, 0
+; VECTOR-NEXT:    vleb %v0, 168(%r15), 15
+; VECTOR-NEXT:    vlrepg %v1, 160(%r15)
 ; VECTOR-NEXT:    vl %v2, 176(%r15), 3
-; VECTOR-NEXT:    vl %v3, 160(%r15), 3
-; VECTOR-NEXT:    vl %v0, 192(%r15), 3
-; VECTOR-NEXT:    vl %v1, 208(%r15), 3
-; VECTOR-NEXT:    vaq %v3, %v3, %v2
-; VECTOR-NEXT:    vst %v1, 48(%r13), 3
-; VECTOR-NEXT:    vst %v0, 32(%r13), 3
 ; VECTOR-NEXT:    vst %v2, 16(%r13), 3
-; VECTOR-NEXT:    vst %v3, 0(%r13), 3
-; VECTOR-NEXT:    lmg %r13, %r15, 328(%r15)
+; VECTOR-NEXT:    vsteg %v1, 0(%r13), 1
+; VECTOR-NEXT:    vsteb %v0, 8(%r13), 15
+; VECTOR-NEXT:    lmg %r13, %r15, 296(%r15)
+; VECTOR-NEXT:    br %r14
+  %C = call %Ty3 @Fnptr()
+  ret %Ty3 %C
+}
+
+%Ty4 = type {float, i8, i16, i32, i64, i128, i8}
+define void @arg4(%Ty4 %A) {
+; CHECK-LABEL: arg4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    l %r0, 164(%r15)
+; CHECK-NEXT:    lgrl %r1, Dst at GOT
+; CHECK-NEXT:    lg %r14, 0(%r6)
+; CHECK-NEXT:    lg %r13, 8(%r6)
+; CHECK-NEXT:    stc %r0, 40(%r1)
+; CHECK-NEXT:    stg %r5, 16(%r1)
+; CHECK-NEXT:    st %r4, 8(%r1)
+; CHECK-NEXT:    sth %r3, 6(%r1)
+; CHECK-NEXT:    stc %r2, 4(%r1)
+; CHECK-NEXT:    ste %f0, 0(%r1)
+; CHECK-NEXT:    stg %r13, 32(%r1)
+; CHECK-NEXT:    stg %r14, 24(%r1)
+; CHECK-NEXT:    lmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: arg4:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    vl %v1, 0(%r6), 3
+; VECTOR-NEXT:    l %r0, 164(%r15)
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
+; VECTOR-NEXT:    stc %r0, 40(%r1)
+; VECTOR-NEXT:    stg %r5, 16(%r1)
+; VECTOR-NEXT:    st %r4, 8(%r1)
+; VECTOR-NEXT:    sth %r3, 6(%r1)
+; VECTOR-NEXT:    stc %r2, 4(%r1)
+; VECTOR-NEXT:    ste %f0, 0(%r1)
+; VECTOR-NEXT:    vst %v1, 24(%r1), 3
+; VECTOR-NEXT:    br %r14
+  store %Ty4 %A, ptr @Dst
+  ret void
+}
+
+define void @call4() {
+; CHECK-LABEL: call4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r6, %r15, 48(%r15)
+; CHECK-NEXT:    .cfi_offset %r6, -112
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -184
+; CHECK-NEXT:    .cfi_def_cfa_offset 344
+; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    lg %r5, 16(%r1)
+; CHECK-NEXT:    l %r4, 8(%r1)
+; CHECK-NEXT:    le %f0, 0(%r1)
+; CHECK-NEXT:    lg %r0, 24(%r1)
+; CHECK-NEXT:    lb %r14, 40(%r1)
+; CHECK-NEXT:    lg %r13, 32(%r1)
+; CHECK-NEXT:    lh %r3, 6(%r1)
+; CHECK-NEXT:    lb %r2, 4(%r1)
+; CHECK-NEXT:    st %r14, 164(%r15)
+; CHECK-NEXT:    stg %r13, 176(%r15)
+; CHECK-NEXT:    la %r6, 168(%r15)
+; CHECK-NEXT:    stg %r0, 168(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lmg %r6, %r15, 232(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: call4:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r6, %r15, 48(%r15)
+; VECTOR-NEXT:    .cfi_offset %r6, -112
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -184
+; VECTOR-NEXT:    .cfi_def_cfa_offset 344
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    lh %r3, 6(%r1)
+; VECTOR-NEXT:    lb %r2, 4(%r1)
+; VECTOR-NEXT:    lb %r0, 40(%r1)
+; VECTOR-NEXT:    lg %r5, 16(%r1)
+; VECTOR-NEXT:    l %r4, 8(%r1)
+; VECTOR-NEXT:    lde %f0, 0(%r1)
+; VECTOR-NEXT:    vl %v1, 24(%r1), 3
+; VECTOR-NEXT:    la %r6, 168(%r15)
+; VECTOR-NEXT:    st %r0, 164(%r15)
+; VECTOR-NEXT:    vst %v1, 168(%r15), 3
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    lmg %r6, %r15, 232(%r15)
 ; VECTOR-NEXT:    br %r14
-  %V = call %Ty7 @foo7()
-  %Val0 = extractvalue %Ty7 %V, 0
-  %Val1 = extractvalue %Ty7 %V, 1
-  %Add = add i128 %Val0, %Val1
-  %Res = insertvalue %Ty7 %V, i128 %Add, 0
-  ret %Ty7 %Res
+  %L = load %Ty4, ptr @Src
+  call void @Fnptr(%Ty4 %L)
+  ret void
 }
 
-%Ty8 = type {float, [2 x i128], i32}
-declare fastcc %Ty8 @foo8()
-define fastcc %Ty8 @fun8() {
-; CHECK-LABEL: fun8:
+define %Ty4 @ret4() {
+; CHECK-LABEL: ret4:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
 ; CHECK-NEXT:    .cfi_offset %r13, -56
@@ -479,24 +568,27 @@ define fastcc %Ty8 @fun8() {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 368
 ; CHECK-NEXT:    lgr %r13, %r2
 ; CHECK-NEXT:    la %r2, 160(%r15)
-; CHECK-NEXT:    brasl %r14, foo8 at PLT
-; CHECK-NEXT:    lg %r0, 176(%r15)
-; CHECK-NEXT:    lg %r1, 168(%r15)
-; CHECK-NEXT:    le %f0, 160(%r15)
-; CHECK-NEXT:    lhi %r2, 1
-; CHECK-NEXT:    a %r2, 200(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lb %r0, 164(%r15)
+; CHECK-NEXT:    lh %r1, 166(%r15)
+; CHECK-NEXT:    lg %r2, 192(%r15)
 ; CHECK-NEXT:    lg %r3, 184(%r15)
-; CHECK-NEXT:    lg %r4, 192(%r15)
+; CHECK-NEXT:    le %f0, 160(%r15)
+; CHECK-NEXT:    l %r4, 168(%r15)
+; CHECK-NEXT:    lg %r5, 176(%r15)
+; CHECK-NEXT:    lb %r14, 200(%r15)
 ; CHECK-NEXT:    ste %f0, 0(%r13)
-; CHECK-NEXT:    st %r2, 40(%r13)
+; CHECK-NEXT:    st %r4, 8(%r13)
+; CHECK-NEXT:    stg %r5, 16(%r13)
+; CHECK-NEXT:    stc %r14, 40(%r13)
 ; CHECK-NEXT:    stg %r3, 24(%r13)
-; CHECK-NEXT:    stg %r4, 32(%r13)
-; CHECK-NEXT:    stg %r1, 8(%r13)
-; CHECK-NEXT:    stg %r0, 16(%r13)
+; CHECK-NEXT:    stg %r2, 32(%r13)
+; CHECK-NEXT:    sth %r1, 6(%r13)
+; CHECK-NEXT:    stc %r0, 4(%r13)
 ; CHECK-NEXT:    lmg %r13, %r15, 312(%r15)
 ; CHECK-NEXT:    br %r14
 ;
-; VECTOR-LABEL: fun8:
+; VECTOR-LABEL: ret4:
 ; VECTOR:       # %bb.0:
 ; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
 ; VECTOR-NEXT:    .cfi_offset %r13, -56
@@ -506,45 +598,309 @@ define fastcc %Ty8 @fun8() {
 ; VECTOR-NEXT:    .cfi_def_cfa_offset 368
 ; VECTOR-NEXT:    lgr %r13, %r2
 ; VECTOR-NEXT:    la %r2, 160(%r15)
-; VECTOR-NEXT:    brasl %r14, foo8 at PLT
-; VECTOR-NEXT:    lhi %r0, 1
-; VECTOR-NEXT:    a %r0, 200(%r15)
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    lb %r0, 164(%r15)
+; VECTOR-NEXT:    lh %r1, 166(%r15)
+; VECTOR-NEXT:    lb %r4, 200(%r15)
 ; VECTOR-NEXT:    lde %f0, 160(%r15)
-; VECTOR-NEXT:    vl %v1, 168(%r15), 3
-; VECTOR-NEXT:    vl %v2, 184(%r15), 3
-; VECTOR-NEXT:    st %r0, 40(%r13)
-; VECTOR-NEXT:    vst %v2, 24(%r13), 3
-; VECTOR-NEXT:    vst %v1, 8(%r13), 3
+; VECTOR-NEXT:    l %r2, 168(%r15)
+; VECTOR-NEXT:    lg %r3, 176(%r15)
+; VECTOR-NEXT:    vl %v1, 184(%r15), 3
+; VECTOR-NEXT:    stc %r4, 40(%r13)
+; VECTOR-NEXT:    vst %v1, 24(%r13), 3
+; VECTOR-NEXT:    stg %r3, 16(%r13)
+; VECTOR-NEXT:    st %r2, 8(%r13)
+; VECTOR-NEXT:    sth %r1, 6(%r13)
+; VECTOR-NEXT:    stc %r0, 4(%r13)
 ; VECTOR-NEXT:    ste %f0, 0(%r13)
 ; VECTOR-NEXT:    lmg %r13, %r15, 312(%r15)
 ; VECTOR-NEXT:    br %r14
-  %V = call %Ty8 @foo8()
-  %Val2 = extractvalue %Ty8 %V, 2
-  %Add = add i32 %Val2, 1
-  %Res = insertvalue %Ty8 %V, i32 %Add, 2
-  ret %Ty8 %Res
+  %C = call %Ty4 @Fnptr()
+  ret %Ty4 %C
 }
 
-%Ty9 = type {i72}
-declare fastcc %Ty9 @foo9(%Ty9)
-define fastcc %Ty9 @fun9(%Ty9 %A) {
-; CHECK-LABEL: fun9:
+%Ty5 = type [4 x i128]
+define void @arg5(%Ty5 %A) {
+; CHECK-LABEL: arg5:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    stmg %r12, %r15, 96(%r15)
+; CHECK-NEXT:    .cfi_offset %r12, -64
+; CHECK-NEXT:    .cfi_offset %r13, -56
 ; CHECK-NEXT:    .cfi_offset %r14, -48
 ; CHECK-NEXT:    .cfi_offset %r15, -40
-; CHECK-NEXT:    aghi %r15, -176
-; CHECK-NEXT:    .cfi_def_cfa_offset 336
-; CHECK-NEXT:    lg %r0, 8(%r2)
-; CHECK-NEXT:    lg %r1, 0(%r2)
-; CHECK-NEXT:    stg %r0, 168(%r15)
+; CHECK-NEXT:    lg %r0, 0(%r2)
+; CHECK-NEXT:    lg %r1, 8(%r2)
+; CHECK-NEXT:    lg %r2, 0(%r3)
+; CHECK-NEXT:    lg %r3, 8(%r3)
+; CHECK-NEXT:    lg %r14, 8(%r5)
+; CHECK-NEXT:    lgrl %r13, Dst at GOT
+; CHECK-NEXT:    lg %r5, 0(%r5)
+; CHECK-NEXT:    lg %r12, 8(%r4)
+; CHECK-NEXT:    lg %r4, 0(%r4)
+; CHECK-NEXT:    stg %r14, 56(%r13)
+; CHECK-NEXT:    stg %r5, 48(%r13)
+; CHECK-NEXT:    stg %r12, 40(%r13)
+; CHECK-NEXT:    stg %r4, 32(%r13)
+; CHECK-NEXT:    stg %r3, 24(%r13)
+; CHECK-NEXT:    stg %r2, 16(%r13)
+; CHECK-NEXT:    stg %r1, 8(%r13)
+; CHECK-NEXT:    stg %r0, 0(%r13)
+; CHECK-NEXT:    lmg %r12, %r15, 96(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: arg5:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    vl %v0, 0(%r2), 3
+; VECTOR-NEXT:    vl %v1, 0(%r3), 3
+; VECTOR-NEXT:    vl %v2, 0(%r4), 3
+; VECTOR-NEXT:    vl %v3, 0(%r5), 3
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
+; VECTOR-NEXT:    vst %v3, 48(%r1), 3
+; VECTOR-NEXT:    vst %v2, 32(%r1), 3
+; VECTOR-NEXT:    vst %v1, 16(%r1), 3
+; VECTOR-NEXT:    vst %v0, 0(%r1), 3
+; VECTOR-NEXT:    br %r14
+  store %Ty5 %A, ptr @Dst
+  ret void
+}
+
+define void @call5() {
+; CHECK-LABEL: call5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -224
+; CHECK-NEXT:    .cfi_def_cfa_offset 384
+; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    lg %r0, 0(%r1)
+; CHECK-NEXT:    lg %r2, 8(%r1)
+; CHECK-NEXT:    lg %r3, 16(%r1)
+; CHECK-NEXT:    lg %r4, 24(%r1)
+; CHECK-NEXT:    lg %r5, 56(%r1)
+; CHECK-NEXT:    lg %r14, 48(%r1)
+; CHECK-NEXT:    lg %r13, 40(%r1)
+; CHECK-NEXT:    lg %r1, 32(%r1)
+; CHECK-NEXT:    stg %r5, 168(%r15)
+; CHECK-NEXT:    stg %r14, 160(%r15)
+; CHECK-NEXT:    stg %r13, 184(%r15)
+; CHECK-NEXT:    stg %r1, 176(%r15)
+; CHECK-NEXT:    stg %r4, 200(%r15)
+; CHECK-NEXT:    stg %r3, 192(%r15)
+; CHECK-NEXT:    stg %r2, 216(%r15)
+; CHECK-NEXT:    la %r2, 208(%r15)
+; CHECK-NEXT:    la %r3, 192(%r15)
+; CHECK-NEXT:    la %r4, 176(%r15)
+; CHECK-NEXT:    la %r5, 160(%r15)
+; CHECK-NEXT:    stg %r0, 208(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lmg %r13, %r15, 328(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: call5:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -224
+; VECTOR-NEXT:    .cfi_def_cfa_offset 384
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    vl %v0, 0(%r1), 3
+; VECTOR-NEXT:    vl %v1, 16(%r1), 3
+; VECTOR-NEXT:    vl %v2, 32(%r1), 3
+; VECTOR-NEXT:    vl %v3, 48(%r1), 3
+; VECTOR-NEXT:    la %r2, 208(%r15)
+; VECTOR-NEXT:    la %r3, 192(%r15)
+; VECTOR-NEXT:    la %r4, 176(%r15)
+; VECTOR-NEXT:    la %r5, 160(%r15)
+; VECTOR-NEXT:    vst %v3, 160(%r15), 3
+; VECTOR-NEXT:    vst %v2, 176(%r15), 3
+; VECTOR-NEXT:    vst %v1, 192(%r15), 3
+; VECTOR-NEXT:    vst %v0, 208(%r15), 3
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    lmg %r14, %r15, 336(%r15)
+; VECTOR-NEXT:    br %r14
+  %L = load %Ty5, ptr @Src
+  call void @Fnptr(%Ty5 %L)
+  ret void
+}
+
+define %Ty5 @ret5() {
+; CHECK-LABEL: ret5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r12, %r15, 96(%r15)
+; CHECK-NEXT:    .cfi_offset %r12, -64
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -224
+; CHECK-NEXT:    .cfi_def_cfa_offset 384
+; CHECK-NEXT:    lgr %r13, %r2
 ; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lg %r0, 168(%r15)
+; CHECK-NEXT:    lg %r1, 160(%r15)
+; CHECK-NEXT:    lg %r2, 184(%r15)
+; CHECK-NEXT:    lg %r3, 176(%r15)
+; CHECK-NEXT:    lg %r4, 208(%r15)
+; CHECK-NEXT:    lg %r5, 216(%r15)
+; CHECK-NEXT:    lg %r14, 192(%r15)
+; CHECK-NEXT:    lg %r12, 200(%r15)
+; CHECK-NEXT:    stg %r4, 48(%r13)
+; CHECK-NEXT:    stg %r5, 56(%r13)
+; CHECK-NEXT:    stg %r14, 32(%r13)
+; CHECK-NEXT:    stg %r12, 40(%r13)
+; CHECK-NEXT:    stg %r3, 16(%r13)
+; CHECK-NEXT:    stg %r2, 24(%r13)
+; CHECK-NEXT:    stg %r1, 0(%r13)
+; CHECK-NEXT:    stg %r0, 8(%r13)
+; CHECK-NEXT:    lmg %r12, %r15, 320(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: ret5:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT:    .cfi_offset %r13, -56
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -224
+; VECTOR-NEXT:    .cfi_def_cfa_offset 384
+; VECTOR-NEXT:    lgr %r13, %r2
+; VECTOR-NEXT:    la %r2, 160(%r15)
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    vl %v0, 160(%r15), 3
+; VECTOR-NEXT:    vl %v1, 176(%r15), 3
+; VECTOR-NEXT:    vl %v2, 192(%r15), 3
+; VECTOR-NEXT:    vl %v3, 208(%r15), 3
+; VECTOR-NEXT:    vst %v3, 48(%r13), 3
+; VECTOR-NEXT:    vst %v2, 32(%r13), 3
+; VECTOR-NEXT:    vst %v1, 16(%r13), 3
+; VECTOR-NEXT:    vst %v0, 0(%r13), 3
+; VECTOR-NEXT:    lmg %r13, %r15, 328(%r15)
+; VECTOR-NEXT:    br %r14
+  %C = call %Ty5 @Fnptr()
+  ret %Ty5 %C
+}
+
+%Ty6 = type [2 x i72]
+define void @arg6(%Ty6 %A) {
+; CHECK-LABEL: arg6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lg %r0, 8(%r3)
+; CHECK-NEXT:    lgrl %r1, Dst at GOT
+; CHECK-NEXT:    lg %r4, 8(%r2)
+; CHECK-NEXT:    lg %r3, 0(%r3)
+; CHECK-NEXT:    lg %r2, 0(%r2)
+; CHECK-NEXT:    stc %r0, 24(%r1)
+; CHECK-NEXT:    stc %r4, 8(%r1)
+; CHECK-NEXT:    sllg %r3, %r3, 56
+; CHECK-NEXT:    rosbg %r3, %r0, 8, 63, 56
+; CHECK-NEXT:    stg %r3, 16(%r1)
+; CHECK-NEXT:    sllg %r0, %r2, 56
+; CHECK-NEXT:    rosbg %r0, %r4, 8, 63, 56
+; CHECK-NEXT:    stg %r0, 0(%r1)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: arg6:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    vl %v0, 0(%r2), 3
+; VECTOR-NEXT:    vl %v1, 0(%r3), 3
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
+; VECTOR-NEXT:    vsteb %v1, 24(%r1), 15
+; VECTOR-NEXT:    vrepib %v2, 8
+; VECTOR-NEXT:    vsteb %v0, 8(%r1), 15
+; VECTOR-NEXT:    vsrlb %v1, %v1, %v2
+; VECTOR-NEXT:    vsrlb %v0, %v0, %v2
+; VECTOR-NEXT:    vsteg %v1, 16(%r1), 1
+; VECTOR-NEXT:    vsteg %v0, 0(%r1), 1
+; VECTOR-NEXT:    br %r14
+  store %Ty6 %A, ptr @Dst
+  ret void
+}
+
+define void @call6() {
+; CHECK-LABEL: call6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 352
+; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    lg %r0, 0(%r1)
+; CHECK-NEXT:    lg %r2, 16(%r1)
+; CHECK-NEXT:    sllg %r3, %r0, 8
+; CHECK-NEXT:    sllg %r4, %r2, 8
+; CHECK-NEXT:    ic %r4, 24(%r1)
+; CHECK-NEXT:    ic %r3, 8(%r1)
+; CHECK-NEXT:    srlg %r0, %r0, 56
+; CHECK-NEXT:    srlg %r1, %r2, 56
+; CHECK-NEXT:    stg %r4, 168(%r15)
 ; CHECK-NEXT:    stg %r1, 160(%r15)
-; CHECK-NEXT:    brasl %r14, foo9 at PLT
-; CHECK-NEXT:    lmg %r14, %r15, 288(%r15)
+; CHECK-NEXT:    stg %r3, 184(%r15)
+; CHECK-NEXT:    la %r2, 176(%r15)
+; CHECK-NEXT:    la %r3, 160(%r15)
+; CHECK-NEXT:    stg %r0, 176(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: call6:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -192
+; VECTOR-NEXT:    .cfi_def_cfa_offset 352
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    vgbm %v1, 0
+; VECTOR-NEXT:    vleb %v1, 8(%r1), 15
+; VECTOR-NEXT:    vlrepg %v2, 0(%r1)
+; VECTOR-NEXT:    vrepib %v3, 8
+; VECTOR-NEXT:    vslb %v2, %v2, %v3
+; VECTOR-NEXT:    vgbm %v0, 0
+; VECTOR-NEXT:    vo %v1, %v1, %v2
+; VECTOR-NEXT:    vleb %v0, 24(%r1), 15
+; VECTOR-NEXT:    vlrepg %v2, 16(%r1)
+; VECTOR-NEXT:    vslb %v2, %v2, %v3
+; VECTOR-NEXT:    vo %v0, %v0, %v2
+; VECTOR-NEXT:    la %r2, 176(%r15)
+; VECTOR-NEXT:    la %r3, 160(%r15)
+; VECTOR-NEXT:    vst %v0, 160(%r15), 3
+; VECTOR-NEXT:    vst %v1, 176(%r15), 3
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT:    br %r14
+  %L = load %Ty6, ptr @Src
+  call void @Fnptr(%Ty6 %L)
+  ret void
+}
+
+define %Ty6 @ret6() {
+; CHECK-LABEL: ret6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 352
+; CHECK-NEXT:    lgr %r13, %r2
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lg %r0, 176(%r15)
+; CHECK-NEXT:    llgc %r1, 184(%r15)
+; CHECK-NEXT:    lg %r2, 160(%r15)
+; CHECK-NEXT:    llgc %r3, 168(%r15)
+; CHECK-NEXT:    stg %r0, 16(%r13)
+; CHECK-NEXT:    stc %r1, 24(%r13)
+; CHECK-NEXT:    stg %r2, 0(%r13)
+; CHECK-NEXT:    stc %r3, 8(%r13)
+; CHECK-NEXT:    lmg %r13, %r15, 296(%r15)
 ; CHECK-NEXT:    br %r14
 ;
-; VECTOR-LABEL: fun9:
+; VECTOR-LABEL: ret6:
 ; VECTOR:       # %bb.0:
 ; VECTOR-NEXT:    stmg %r13, %r15, 104(%r15)
 ; VECTOR-NEXT:    .cfi_offset %r13, -56
@@ -552,19 +908,97 @@ define fastcc %Ty9 @fun9(%Ty9 %A) {
 ; VECTOR-NEXT:    .cfi_offset %r15, -40
 ; VECTOR-NEXT:    aghi %r15, -192
 ; VECTOR-NEXT:    .cfi_def_cfa_offset 352
-; VECTOR-NEXT:    vl %v0, 0(%r3), 3
 ; VECTOR-NEXT:    lgr %r13, %r2
 ; VECTOR-NEXT:    la %r2, 160(%r15)
-; VECTOR-NEXT:    la %r3, 176(%r15)
-; VECTOR-NEXT:    vst %v0, 176(%r15), 3
-; VECTOR-NEXT:    brasl %r14, foo9 at PLT
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
 ; VECTOR-NEXT:    vgbm %v0, 0
-; VECTOR-NEXT:    vleb %v0, 168(%r15), 15
-; VECTOR-NEXT:    vlrepg %v1, 160(%r15)
-; VECTOR-NEXT:    vsteg %v1, 0(%r13), 1
-; VECTOR-NEXT:    vsteb %v0, 8(%r13), 15
+; VECTOR-NEXT:    vgbm %v1, 0
+; VECTOR-NEXT:    vleb %v1, 168(%r15), 15
+; VECTOR-NEXT:    vleb %v0, 184(%r15), 15
+; VECTOR-NEXT:    vlrepg %v2, 160(%r15)
+; VECTOR-NEXT:    vlrepg %v3, 176(%r15)
+; VECTOR-NEXT:    vsteg %v3, 16(%r13), 1
+; VECTOR-NEXT:    vsteb %v0, 24(%r13), 15
+; VECTOR-NEXT:    vsteg %v2, 0(%r13), 1
+; VECTOR-NEXT:    vsteb %v1, 8(%r13), 15
 ; VECTOR-NEXT:    lmg %r13, %r15, 296(%r15)
 ; VECTOR-NEXT:    br %r14
-  %Res = call %Ty9 @foo9(%Ty9 %A)
-  ret %Ty9 %Res
+  %C = call %Ty6 @Fnptr()
+  ret %Ty6 %C
+}
+
+%Ty7 = type {i128}
+define void @arg7(%Ty7 %A, %Ty7 %B) {
+; CHECK-LABEL: arg7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lg %r0, 8(%r2)
+; CHECK-NEXT:    lgrl %r1, Dst at GOT
+; CHECK-NEXT:    lg %r2, 0(%r2)
+; CHECK-NEXT:    lg %r4, 8(%r3)
+; CHECK-NEXT:    lg %r3, 0(%r3)
+; CHECK-NEXT:    stg %r0, 8(%r1)
+; CHECK-NEXT:    stg %r2, 0(%r1)
+; CHECK-NEXT:    stg %r4, 24(%r1)
+; CHECK-NEXT:    stg %r3, 16(%r1)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: arg7:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    vl %v0, 0(%r3), 3
+; VECTOR-NEXT:    vl %v1, 0(%r2), 3
+; VECTOR-NEXT:    lgrl %r1, Dst at GOT
+; VECTOR-NEXT:    vst %v1, 0(%r1), 3
+; VECTOR-NEXT:    vst %v0, 16(%r1), 3
+; VECTOR-NEXT:    br %r14
+  store %Ty7 %A, ptr @Dst
+  %D2 = getelementptr %Ty7, ptr @Dst, i32 1
+  store %Ty7 %B, ptr %D2
+  ret void
+}
+
+define void @call7() {
+; CHECK-LABEL: call7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 352
+; CHECK-NEXT:    lgrl %r1, Src at GOT
+; CHECK-NEXT:    lg %r0, 24(%r1)
+; CHECK-NEXT:    lg %r2, 16(%r1)
+; CHECK-NEXT:    lg %r3, 8(%r1)
+; CHECK-NEXT:    lg %r1, 0(%r1)
+; CHECK-NEXT:    stg %r0, 168(%r15)
+; CHECK-NEXT:    stg %r2, 160(%r15)
+; CHECK-NEXT:    stg %r3, 184(%r15)
+; CHECK-NEXT:    la %r2, 176(%r15)
+; CHECK-NEXT:    la %r3, 160(%r15)
+; CHECK-NEXT:    stg %r1, 176(%r15)
+; CHECK-NEXT:    brasl %r14, Fnptr at PLT
+; CHECK-NEXT:    lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT:    br %r14
+;
+; VECTOR-LABEL: call7:
+; VECTOR:       # %bb.0:
+; VECTOR-NEXT:    stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT:    .cfi_offset %r14, -48
+; VECTOR-NEXT:    .cfi_offset %r15, -40
+; VECTOR-NEXT:    aghi %r15, -192
+; VECTOR-NEXT:    .cfi_def_cfa_offset 352
+; VECTOR-NEXT:    lgrl %r1, Src at GOT
+; VECTOR-NEXT:    vl %v0, 0(%r1), 3
+; VECTOR-NEXT:    vl %v1, 16(%r1), 3
+; VECTOR-NEXT:    la %r2, 176(%r15)
+; VECTOR-NEXT:    la %r3, 160(%r15)
+; VECTOR-NEXT:    vst %v1, 160(%r15), 3
+; VECTOR-NEXT:    vst %v0, 176(%r15), 3
+; VECTOR-NEXT:    brasl %r14, Fnptr at PLT
+; VECTOR-NEXT:    lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT:    br %r14
+  %L = load %Ty7, ptr @Src
+  %S2 = getelementptr %Ty7, ptr @Src, i32 1
+  %L2 = load %Ty7, ptr %S2
+  call void @Fnptr(%Ty7 %L, %Ty7 %L2)
+  ret void
 }



More information about the llvm-commits mailing list