[llvm] [SystemZ] Handle IR struct arguments correctly. (PR #169583)
Jonas Paulsson via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 3 17:35:47 PST 2025
https://github.com/JonPsson1 updated https://github.com/llvm/llvm-project/pull/169583
>From d78732afcba4174a23f419e1a6c5022ca6f6d4fc Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulsson at linux.vnet.ibm.com>
Date: Tue, 25 Nov 2025 22:36:51 +0100
Subject: [PATCH 1/5] Handle IR struct args correctly.
---
.../Target/SystemZ/SystemZISelLowering.cpp | 78 +++--
llvm/test/CodeGen/SystemZ/args-22.ll | 278 ++++++++++++++++++
2 files changed, 331 insertions(+), 25 deletions(-)
create mode 100644 llvm/test/CodeGen/SystemZ/args-22.ll
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index eb93024bed35c..ab22c0efa8454 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1970,6 +1970,28 @@ SDValue SystemZTargetLowering::joinRegisterPartsIntoValue(
return SDValue();
}
+// The first part of a split stack argument is at index I in Args (and
+// ArgLocs). Return the type of a part and the number of them by reference.
+template <class ArgTy>
+static bool analyzeArgSplit(const SmallVectorImpl<ArgTy> &Args,
+ SmallVector<CCValAssign, 16> &ArgLocs, unsigned I,
+ MVT &PartVT, unsigned &NumParts) {
+ if (!Args[I].Flags.isSplit())
+ return false;
+ assert(I < ArgLocs.size() && ArgLocs.size() == Args.size() &&
+ "ArgLocs havoc.");
+ PartVT = ArgLocs[I].getValVT();
+ NumParts = 1;
+ for (unsigned PartIdx = I + 1;; ++PartIdx) {
+ assert(PartIdx != ArgLocs.size() && "SplitEnd not found.");
+ assert(ArgLocs[PartIdx].getValVT() == PartVT && "Unsupported split.");
+ ++NumParts;
+ if (Args[PartIdx].Flags.isSplitEnd())
+ break;
+ }
+ return true;
+}
+
SDValue SystemZTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
@@ -2074,16 +2096,26 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
MachinePointerInfo()));
// If the original argument was split (e.g. i128), we need
// to load all parts of it here (using the same address).
- unsigned ArgIndex = Ins[I].OrigArgIndex;
- assert (Ins[I].PartOffset == 0);
- while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
- CCValAssign &PartVA = ArgLocs[I + 1];
- unsigned PartOffset = Ins[I + 1].PartOffset;
- SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
- DAG.getIntPtrConstant(PartOffset, DL));
- InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
- MachinePointerInfo()));
- ++I;
+ MVT PartVT;
+ unsigned NumParts;
+ if (analyzeArgSplit(Ins, ArgLocs, I, PartVT, NumParts)) {
+ // TODO: It is strange that while LowerCallTo() sets the PartOffset
+ // relative to the first split part LowerArguments() sets the offset
+ // from the beginning of the struct. So with {i32, i256}, the
+ // PartOffset for the i256 parts are differently handled. Try to
+ // remove that difference and use PartOffset directly here (instead
+ // of SplitBaseOffs).
+ unsigned SplitBaseOffs = Ins[I].PartOffset;
+ for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
+ ++I;
+ CCValAssign &PartVA = ArgLocs[I];
+ unsigned PartOffset = Ins[I].PartOffset - SplitBaseOffs;
+ SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
+ DAG.getIntPtrConstant(PartOffset, DL));
+ InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
+ MachinePointerInfo()));
+ assert(PartOffset && "Offset should be non-zero.");
+ }
}
} else
InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
@@ -2319,18 +2351,13 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (VA.getLocInfo() == CCValAssign::Indirect) {
// Store the argument in a stack slot and pass its address.
- unsigned ArgIndex = Outs[I].OrigArgIndex;
EVT SlotVT;
- if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
- // Allocate the full stack space for a promoted (and split) argument.
- Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
- EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
- MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
- unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
- SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
- } else {
+ MVT PartVT;
+ unsigned NumParts = 1;
+ if (analyzeArgSplit(Outs, ArgLocs, I, PartVT, NumParts))
+ SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * NumParts);
+ else
SlotVT = Outs[I].VT;
- }
SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
MemOpChains.push_back(
@@ -2338,18 +2365,19 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
MachinePointerInfo::getFixedStack(MF, FI)));
// If the original argument was split (e.g. i128), we need
// to store all parts of it here (and pass just one address).
- assert (Outs[I].PartOffset == 0);
- while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
- SDValue PartValue = OutVals[I + 1];
- unsigned PartOffset = Outs[I + 1].PartOffset;
+ assert(Outs[I].PartOffset == 0);
+ for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
+ ++I;
+ SDValue PartValue = OutVals[I];
+ unsigned PartOffset = Outs[I].PartOffset;
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
DAG.getIntPtrConstant(PartOffset, DL));
MemOpChains.push_back(
DAG.getStore(Chain, DL, PartValue, Address,
MachinePointerInfo::getFixedStack(MF, FI)));
+ assert(PartOffset && "Offset should be non-zero.");
assert((PartOffset + PartValue.getValueType().getStoreSize() <=
SlotVT.getStoreSize()) && "Not enough space for argument part!");
- ++I;
}
ArgValue = SpillSlot;
} else
diff --git a/llvm/test/CodeGen/SystemZ/args-22.ll b/llvm/test/CodeGen/SystemZ/args-22.ll
new file mode 100644
index 0000000000000..b920bb4847061
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/args-22.ll
@@ -0,0 +1,278 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s --check-prefix=VECTOR
+;
+; Test passing IR struct arguments, which do not adhere to the ABI but are
+; split up with each element passed like a separate argument.
+
+%Ty0 = type {i128}
+define fastcc void @fun0(%Ty0 %A) {
+; CHECK-LABEL: fun0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -176
+; CHECK-NEXT: .cfi_def_cfa_offset 336
+; CHECK-NEXT: lg %r0, 8(%r2)
+; CHECK-NEXT: lg %r1, 0(%r2)
+; CHECK-NEXT: stg %r0, 168(%r15)
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: stg %r1, 160(%r15)
+; CHECK-NEXT: basr %r14, %r1
+; CHECK-NEXT: lmg %r14, %r15, 288(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: fun0:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -176
+; VECTOR-NEXT: .cfi_def_cfa_offset 336
+; VECTOR-NEXT: vl %v0, 0(%r2), 3
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: vst %v0, 160(%r15), 3
+; VECTOR-NEXT: basr %r14, %r1
+; VECTOR-NEXT: lmg %r14, %r15, 288(%r15)
+; VECTOR-NEXT: br %r14
+ call void undef(%Ty0 %A)
+ ret void
+}
+
+%Ty1 = type {i128, i128}
+define fastcc void @fun1(%Ty1 %A, %Ty1 %B) {
+; CHECK-LABEL: fun1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -224
+; CHECK-NEXT: .cfi_def_cfa_offset 384
+; CHECK-NEXT: lg %r0, 0(%r2)
+; CHECK-NEXT: lg %r1, 8(%r2)
+; CHECK-NEXT: lg %r2, 0(%r3)
+; CHECK-NEXT: lg %r3, 8(%r3)
+; CHECK-NEXT: lg %r14, 8(%r5)
+; CHECK-NEXT: lg %r5, 0(%r5)
+; CHECK-NEXT: lg %r13, 8(%r4)
+; CHECK-NEXT: lg %r4, 0(%r4)
+; CHECK-NEXT: stg %r14, 168(%r15)
+; CHECK-NEXT: stg %r5, 160(%r15)
+; CHECK-NEXT: stg %r13, 184(%r15)
+; CHECK-NEXT: stg %r4, 176(%r15)
+; CHECK-NEXT: stg %r3, 200(%r15)
+; CHECK-NEXT: stg %r2, 192(%r15)
+; CHECK-NEXT: stg %r1, 216(%r15)
+; CHECK-NEXT: la %r2, 208(%r15)
+; CHECK-NEXT: la %r3, 192(%r15)
+; CHECK-NEXT: la %r4, 176(%r15)
+; CHECK-NEXT: la %r5, 160(%r15)
+; CHECK-NEXT: stg %r0, 208(%r15)
+; CHECK-NEXT: basr %r14, %r1
+; CHECK-NEXT: lmg %r13, %r15, 328(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: fun1:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -224
+; VECTOR-NEXT: .cfi_def_cfa_offset 384
+; VECTOR-NEXT: vl %v0, 0(%r2), 3
+; VECTOR-NEXT: vl %v1, 0(%r3), 3
+; VECTOR-NEXT: vl %v2, 0(%r4), 3
+; VECTOR-NEXT: vl %v3, 0(%r5), 3
+; VECTOR-NEXT: la %r2, 208(%r15)
+; VECTOR-NEXT: la %r3, 192(%r15)
+; VECTOR-NEXT: la %r4, 176(%r15)
+; VECTOR-NEXT: la %r5, 160(%r15)
+; VECTOR-NEXT: vst %v3, 160(%r15), 3
+; VECTOR-NEXT: vst %v2, 176(%r15), 3
+; VECTOR-NEXT: vst %v1, 192(%r15), 3
+; VECTOR-NEXT: vst %v0, 208(%r15), 3
+; VECTOR-NEXT: basr %r14, %r1
+; VECTOR-NEXT: lmg %r14, %r15, 336(%r15)
+; VECTOR-NEXT: br %r14
+ call void undef(%Ty1 %A, %Ty1 %B)
+ ret void
+}
+
+%Ty2 = type {i256}
+define fastcc void @fun2(%Ty2 %A, %Ty2 %B) {
+; CHECK-LABEL: fun2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -224
+; CHECK-NEXT: .cfi_def_cfa_offset 384
+; CHECK-NEXT: lg %r0, 0(%r2)
+; CHECK-NEXT: lg %r1, 8(%r2)
+; CHECK-NEXT: lg %r4, 16(%r2)
+; CHECK-NEXT: lg %r2, 24(%r2)
+; CHECK-NEXT: lg %r5, 24(%r3)
+; CHECK-NEXT: lg %r14, 16(%r3)
+; CHECK-NEXT: lg %r13, 8(%r3)
+; CHECK-NEXT: lg %r3, 0(%r3)
+; CHECK-NEXT: stg %r5, 184(%r15)
+; CHECK-NEXT: stg %r14, 176(%r15)
+; CHECK-NEXT: stg %r13, 168(%r15)
+; CHECK-NEXT: stg %r3, 160(%r15)
+; CHECK-NEXT: stg %r2, 216(%r15)
+; CHECK-NEXT: stg %r4, 208(%r15)
+; CHECK-NEXT: stg %r1, 200(%r15)
+; CHECK-NEXT: la %r2, 192(%r15)
+; CHECK-NEXT: la %r3, 160(%r15)
+; CHECK-NEXT: stg %r0, 192(%r15)
+; CHECK-NEXT: basr %r14, %r1
+; CHECK-NEXT: lmg %r13, %r15, 328(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: fun2:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -224
+; VECTOR-NEXT: .cfi_def_cfa_offset 384
+; VECTOR-NEXT: vl %v0, 0(%r2), 3
+; VECTOR-NEXT: vl %v1, 16(%r2), 3
+; VECTOR-NEXT: vl %v2, 0(%r3), 3
+; VECTOR-NEXT: vl %v3, 16(%r3), 3
+; VECTOR-NEXT: la %r2, 192(%r15)
+; VECTOR-NEXT: la %r3, 160(%r15)
+; VECTOR-NEXT: vst %v3, 176(%r15), 3
+; VECTOR-NEXT: vst %v2, 160(%r15), 3
+; VECTOR-NEXT: vst %v1, 208(%r15), 3
+; VECTOR-NEXT: vst %v0, 192(%r15), 3
+; VECTOR-NEXT: basr %r14, %r1
+; VECTOR-NEXT: lmg %r14, %r15, 336(%r15)
+; VECTOR-NEXT: br %r14
+ call void undef(%Ty2 %A, %Ty2 %B)
+ ret void
+}
+
+%Ty3 = type {float, i256, i32, i128, i8}
+define fastcc void @fun3(%Ty3 %A) {
+; CHECK-LABEL: fun3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -208
+; CHECK-NEXT: .cfi_def_cfa_offset 368
+; CHECK-NEXT: lg %r0, 0(%r2)
+; CHECK-NEXT: lg %r1, 8(%r2)
+; CHECK-NEXT: lg %r14, 16(%r2)
+; CHECK-NEXT: lg %r2, 24(%r2)
+; CHECK-NEXT: lg %r13, 0(%r4)
+; CHECK-NEXT: lg %r4, 8(%r4)
+; CHECK-NEXT: stc %r5, 64
+; CHECK-NEXT: st %r3, 40
+; CHECK-NEXT: ste %f0, 0
+; CHECK-NEXT: stg %r4, 56
+; CHECK-NEXT: stg %r13, 48
+; CHECK-NEXT: stg %r2, 32
+; CHECK-NEXT: stg %r14, 24
+; CHECK-NEXT: stg %r1, 16
+; CHECK-NEXT: stg %r0, 8
+; CHECK-NEXT: stg %r4, 168(%r15)
+; CHECK-NEXT: stg %r13, 160(%r15)
+; CHECK-NEXT: stg %r2, 200(%r15)
+; CHECK-NEXT: stg %r14, 192(%r15)
+; CHECK-NEXT: stg %r1, 184(%r15)
+; CHECK-NEXT: la %r2, 176(%r15)
+; CHECK-NEXT: la %r4, 160(%r15)
+; CHECK-NEXT: stg %r0, 176(%r15)
+; CHECK-NEXT: basr %r14, %r1
+; CHECK-NEXT: lmg %r13, %r15, 312(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: fun3:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -208
+; VECTOR-NEXT: .cfi_def_cfa_offset 368
+; VECTOR-NEXT: vl %v1, 0(%r4), 3
+; VECTOR-NEXT: vl %v2, 0(%r2), 3
+; VECTOR-NEXT: vl %v3, 16(%r2), 3
+; VECTOR-NEXT: la %r2, 176(%r15)
+; VECTOR-NEXT: la %r4, 160(%r15)
+; VECTOR-NEXT: stc %r5, 64
+; VECTOR-NEXT: st %r3, 40
+; VECTOR-NEXT: ste %f0, 0
+; VECTOR-NEXT: vst %v3, 24, 3
+; VECTOR-NEXT: vst %v2, 8, 3
+; VECTOR-NEXT: vst %v1, 48, 3
+; VECTOR-NEXT: vst %v1, 160(%r15), 3
+; VECTOR-NEXT: vst %v3, 192(%r15), 3
+; VECTOR-NEXT: vst %v2, 176(%r15), 3
+; VECTOR-NEXT: basr %r14, %r1
+; VECTOR-NEXT: lmg %r14, %r15, 320(%r15)
+; VECTOR-NEXT: br %r14
+ store %Ty3 %A, ptr null
+ call void undef(%Ty3 %A)
+ ret void
+}
+
+%Ty4 = type {i72, i128}
+define fastcc void @fun4(%Ty4 %A) {
+; CHECK-LABEL: fun4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -192
+; CHECK-NEXT: .cfi_def_cfa_offset 352
+; CHECK-NEXT: lg %r0, 8(%r3)
+; CHECK-NEXT: lg %r1, 0(%r3)
+; CHECK-NEXT: lg %r3, 8(%r2)
+; CHECK-NEXT: lg %r4, 0(%r2)
+; CHECK-NEXT: stg %r0, 24
+; CHECK-NEXT: stg %r1, 16
+; CHECK-NEXT: stc %r3, 8
+; CHECK-NEXT: sllg %r2, %r4, 56
+; CHECK-NEXT: rosbg %r2, %r3, 8, 63, 56
+; CHECK-NEXT: stg %r2, 0
+; CHECK-NEXT: stg %r0, 168(%r15)
+; CHECK-NEXT: stg %r1, 160(%r15)
+; CHECK-NEXT: stg %r3, 184(%r15)
+; CHECK-NEXT: la %r2, 176(%r15)
+; CHECK-NEXT: la %r3, 160(%r15)
+; CHECK-NEXT: stg %r4, 176(%r15)
+; CHECK-NEXT: basr %r14, %r1
+; CHECK-NEXT: lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: fun4:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -192
+; VECTOR-NEXT: .cfi_def_cfa_offset 352
+; VECTOR-NEXT: vl %v1, 0(%r2), 3
+; VECTOR-NEXT: vl %v0, 0(%r3), 3
+; VECTOR-NEXT: vsteb %v1, 8, 15
+; VECTOR-NEXT: vrepib %v2, 8
+; VECTOR-NEXT: vsrlb %v2, %v1, %v2
+; VECTOR-NEXT: la %r2, 176(%r15)
+; VECTOR-NEXT: la %r3, 160(%r15)
+; VECTOR-NEXT: vst %v0, 16, 3
+; VECTOR-NEXT: vsteg %v2, 0, 1
+; VECTOR-NEXT: vst %v0, 160(%r15), 3
+; VECTOR-NEXT: vst %v1, 176(%r15), 3
+; VECTOR-NEXT: basr %r14, %r1
+; VECTOR-NEXT: lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT: br %r14
+ store %Ty4 %A, ptr null
+ call void undef(%Ty4 %A)
+ ret void
+}
>From 8d5fc621a64512a527e46973d3d4fda29a1636a2 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulsson at linux.vnet.ibm.com>
Date: Wed, 26 Nov 2025 00:40:49 +0100
Subject: [PATCH 2/5] Add test case for returning.
---
llvm/test/CodeGen/SystemZ/args-22.ll | 43 ++++++++++++++++++++++++++++
1 file changed, 43 insertions(+)
diff --git a/llvm/test/CodeGen/SystemZ/args-22.ll b/llvm/test/CodeGen/SystemZ/args-22.ll
index b920bb4847061..39f7ff5dffe4b 100644
--- a/llvm/test/CodeGen/SystemZ/args-22.ll
+++ b/llvm/test/CodeGen/SystemZ/args-22.ll
@@ -276,3 +276,46 @@ define fastcc void @fun4(%Ty4 %A) {
call void undef(%Ty4 %A)
ret void
}
+
+%Ty5 = type {i128, i128}
+declare fastcc %Ty5 @foo5()
+define fastcc void @fun5() {
+; CHECK-LABEL: fun5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -192
+; CHECK-NEXT: .cfi_def_cfa_offset 352
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: brasl %r14, foo5 at PLT
+; CHECK-NEXT: lg %r0, 176(%r15)
+; CHECK-NEXT: lg %r1, 184(%r15)
+; CHECK-NEXT: lg %r2, 160(%r15)
+; CHECK-NEXT: lg %r3, 168(%r15)
+; CHECK-NEXT: stg %r0, 16
+; CHECK-NEXT: stg %r1, 24
+; CHECK-NEXT: stg %r2, 0
+; CHECK-NEXT: stg %r3, 8
+; CHECK-NEXT: lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: fun5:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -192
+; VECTOR-NEXT: .cfi_def_cfa_offset 352
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: brasl %r14, foo5 at PLT
+; VECTOR-NEXT: vl %v0, 160(%r15), 3
+; VECTOR-NEXT: vl %v1, 176(%r15), 3
+; VECTOR-NEXT: vst %v1, 16, 3
+; VECTOR-NEXT: vst %v0, 0, 3
+; VECTOR-NEXT: lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT: br %r14
+ %A = call %Ty5 @foo5()
+ store %Ty5 %A, ptr null
+ ret void
+}
>From 120a9a7befc87828c61a853ba9648675b908b392 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulsson at linux.vnet.ibm.com>
Date: Wed, 26 Nov 2025 01:23:33 +0100
Subject: [PATCH 3/5] Avoid undef calls in tests.
---
llvm/test/CodeGen/SystemZ/args-22.ll | 32 +++++++++++++++-------------
1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/llvm/test/CodeGen/SystemZ/args-22.ll b/llvm/test/CodeGen/SystemZ/args-22.ll
index 39f7ff5dffe4b..021a7536dae3e 100644
--- a/llvm/test/CodeGen/SystemZ/args-22.ll
+++ b/llvm/test/CodeGen/SystemZ/args-22.ll
@@ -5,6 +5,8 @@
; Test passing IR struct arguments, which do not adhere to the ABI but are
; split up with each element passed like a separate argument.
+ at fnptr = external global ptr
+
%Ty0 = type {i128}
define fastcc void @fun0(%Ty0 %A) {
; CHECK-LABEL: fun0:
@@ -19,7 +21,7 @@ define fastcc void @fun0(%Ty0 %A) {
; CHECK-NEXT: stg %r0, 168(%r15)
; CHECK-NEXT: la %r2, 160(%r15)
; CHECK-NEXT: stg %r1, 160(%r15)
-; CHECK-NEXT: basr %r14, %r1
+; CHECK-NEXT: brasl %r14, fnptr at PLT
; CHECK-NEXT: lmg %r14, %r15, 288(%r15)
; CHECK-NEXT: br %r14
;
@@ -33,10 +35,10 @@ define fastcc void @fun0(%Ty0 %A) {
; VECTOR-NEXT: vl %v0, 0(%r2), 3
; VECTOR-NEXT: la %r2, 160(%r15)
; VECTOR-NEXT: vst %v0, 160(%r15), 3
-; VECTOR-NEXT: basr %r14, %r1
+; VECTOR-NEXT: brasl %r14, fnptr at PLT
; VECTOR-NEXT: lmg %r14, %r15, 288(%r15)
; VECTOR-NEXT: br %r14
- call void undef(%Ty0 %A)
+ call void @fnptr(%Ty0 %A)
ret void
}
@@ -70,7 +72,7 @@ define fastcc void @fun1(%Ty1 %A, %Ty1 %B) {
; CHECK-NEXT: la %r4, 176(%r15)
; CHECK-NEXT: la %r5, 160(%r15)
; CHECK-NEXT: stg %r0, 208(%r15)
-; CHECK-NEXT: basr %r14, %r1
+; CHECK-NEXT: brasl %r14, fnptr at PLT
; CHECK-NEXT: lmg %r13, %r15, 328(%r15)
; CHECK-NEXT: br %r14
;
@@ -93,10 +95,10 @@ define fastcc void @fun1(%Ty1 %A, %Ty1 %B) {
; VECTOR-NEXT: vst %v2, 176(%r15), 3
; VECTOR-NEXT: vst %v1, 192(%r15), 3
; VECTOR-NEXT: vst %v0, 208(%r15), 3
-; VECTOR-NEXT: basr %r14, %r1
+; VECTOR-NEXT: brasl %r14, fnptr at PLT
; VECTOR-NEXT: lmg %r14, %r15, 336(%r15)
; VECTOR-NEXT: br %r14
- call void undef(%Ty1 %A, %Ty1 %B)
+ call void @fnptr(%Ty1 %A, %Ty1 %B)
ret void
}
@@ -128,7 +130,7 @@ define fastcc void @fun2(%Ty2 %A, %Ty2 %B) {
; CHECK-NEXT: la %r2, 192(%r15)
; CHECK-NEXT: la %r3, 160(%r15)
; CHECK-NEXT: stg %r0, 192(%r15)
-; CHECK-NEXT: basr %r14, %r1
+; CHECK-NEXT: brasl %r14, fnptr at PLT
; CHECK-NEXT: lmg %r13, %r15, 328(%r15)
; CHECK-NEXT: br %r14
;
@@ -149,10 +151,10 @@ define fastcc void @fun2(%Ty2 %A, %Ty2 %B) {
; VECTOR-NEXT: vst %v2, 160(%r15), 3
; VECTOR-NEXT: vst %v1, 208(%r15), 3
; VECTOR-NEXT: vst %v0, 192(%r15), 3
-; VECTOR-NEXT: basr %r14, %r1
+; VECTOR-NEXT: brasl %r14, fnptr at PLT
; VECTOR-NEXT: lmg %r14, %r15, 336(%r15)
; VECTOR-NEXT: br %r14
- call void undef(%Ty2 %A, %Ty2 %B)
+ call void @fnptr(%Ty2 %A, %Ty2 %B)
ret void
}
@@ -189,7 +191,7 @@ define fastcc void @fun3(%Ty3 %A) {
; CHECK-NEXT: la %r2, 176(%r15)
; CHECK-NEXT: la %r4, 160(%r15)
; CHECK-NEXT: stg %r0, 176(%r15)
-; CHECK-NEXT: basr %r14, %r1
+; CHECK-NEXT: brasl %r14, fnptr at PLT
; CHECK-NEXT: lmg %r13, %r15, 312(%r15)
; CHECK-NEXT: br %r14
;
@@ -214,11 +216,11 @@ define fastcc void @fun3(%Ty3 %A) {
; VECTOR-NEXT: vst %v1, 160(%r15), 3
; VECTOR-NEXT: vst %v3, 192(%r15), 3
; VECTOR-NEXT: vst %v2, 176(%r15), 3
-; VECTOR-NEXT: basr %r14, %r1
+; VECTOR-NEXT: brasl %r14, fnptr at PLT
; VECTOR-NEXT: lmg %r14, %r15, 320(%r15)
; VECTOR-NEXT: br %r14
store %Ty3 %A, ptr null
- call void undef(%Ty3 %A)
+ call void @fnptr(%Ty3 %A)
ret void
}
@@ -247,7 +249,7 @@ define fastcc void @fun4(%Ty4 %A) {
; CHECK-NEXT: la %r2, 176(%r15)
; CHECK-NEXT: la %r3, 160(%r15)
; CHECK-NEXT: stg %r4, 176(%r15)
-; CHECK-NEXT: basr %r14, %r1
+; CHECK-NEXT: brasl %r14, fnptr at PLT
; CHECK-NEXT: lmg %r14, %r15, 304(%r15)
; CHECK-NEXT: br %r14
;
@@ -269,11 +271,11 @@ define fastcc void @fun4(%Ty4 %A) {
; VECTOR-NEXT: vsteg %v2, 0, 1
; VECTOR-NEXT: vst %v0, 160(%r15), 3
; VECTOR-NEXT: vst %v1, 176(%r15), 3
-; VECTOR-NEXT: basr %r14, %r1
+; VECTOR-NEXT: brasl %r14, fnptr at PLT
; VECTOR-NEXT: lmg %r14, %r15, 304(%r15)
; VECTOR-NEXT: br %r14
store %Ty4 %A, ptr null
- call void undef(%Ty4 %A)
+ call void @fnptr(%Ty4 %A)
ret void
}
>From b08256f11c24d85be32f42f25c787720625b2b2f Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulsson at linux.vnet.ibm.com>
Date: Wed, 26 Nov 2025 23:13:13 +0100
Subject: [PATCH 4/5] Testing improved.
---
llvm/test/CodeGen/SystemZ/args-22.ll | 389 ++++++++++++++++++++++-----
1 file changed, 318 insertions(+), 71 deletions(-)
diff --git a/llvm/test/CodeGen/SystemZ/args-22.ll b/llvm/test/CodeGen/SystemZ/args-22.ll
index 021a7536dae3e..da0f3e08c7a62 100644
--- a/llvm/test/CodeGen/SystemZ/args-22.ll
+++ b/llvm/test/CodeGen/SystemZ/args-22.ll
@@ -5,7 +5,9 @@
; Test passing IR struct arguments, which do not adhere to the ABI but are
; split up with each element passed like a separate argument.
- at fnptr = external global ptr
+ at Fnptr = external global ptr
+ at Src = external global ptr
+ at Dst = external global ptr
%Ty0 = type {i128}
define fastcc void @fun0(%Ty0 %A) {
@@ -21,7 +23,7 @@ define fastcc void @fun0(%Ty0 %A) {
; CHECK-NEXT: stg %r0, 168(%r15)
; CHECK-NEXT: la %r2, 160(%r15)
; CHECK-NEXT: stg %r1, 160(%r15)
-; CHECK-NEXT: brasl %r14, fnptr at PLT
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
; CHECK-NEXT: lmg %r14, %r15, 288(%r15)
; CHECK-NEXT: br %r14
;
@@ -35,10 +37,10 @@ define fastcc void @fun0(%Ty0 %A) {
; VECTOR-NEXT: vl %v0, 0(%r2), 3
; VECTOR-NEXT: la %r2, 160(%r15)
; VECTOR-NEXT: vst %v0, 160(%r15), 3
-; VECTOR-NEXT: brasl %r14, fnptr at PLT
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
; VECTOR-NEXT: lmg %r14, %r15, 288(%r15)
; VECTOR-NEXT: br %r14
- call void @fnptr(%Ty0 %A)
+ call void @Fnptr(%Ty0 %A)
ret void
}
@@ -72,7 +74,7 @@ define fastcc void @fun1(%Ty1 %A, %Ty1 %B) {
; CHECK-NEXT: la %r4, 176(%r15)
; CHECK-NEXT: la %r5, 160(%r15)
; CHECK-NEXT: stg %r0, 208(%r15)
-; CHECK-NEXT: brasl %r14, fnptr at PLT
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
; CHECK-NEXT: lmg %r13, %r15, 328(%r15)
; CHECK-NEXT: br %r14
;
@@ -95,10 +97,10 @@ define fastcc void @fun1(%Ty1 %A, %Ty1 %B) {
; VECTOR-NEXT: vst %v2, 176(%r15), 3
; VECTOR-NEXT: vst %v1, 192(%r15), 3
; VECTOR-NEXT: vst %v0, 208(%r15), 3
-; VECTOR-NEXT: brasl %r14, fnptr at PLT
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
; VECTOR-NEXT: lmg %r14, %r15, 336(%r15)
; VECTOR-NEXT: br %r14
- call void @fnptr(%Ty1 %A, %Ty1 %B)
+ call void @Fnptr(%Ty1 %A, %Ty1 %B)
ret void
}
@@ -130,7 +132,7 @@ define fastcc void @fun2(%Ty2 %A, %Ty2 %B) {
; CHECK-NEXT: la %r2, 192(%r15)
; CHECK-NEXT: la %r3, 160(%r15)
; CHECK-NEXT: stg %r0, 192(%r15)
-; CHECK-NEXT: brasl %r14, fnptr at PLT
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
; CHECK-NEXT: lmg %r13, %r15, 328(%r15)
; CHECK-NEXT: br %r14
;
@@ -151,10 +153,10 @@ define fastcc void @fun2(%Ty2 %A, %Ty2 %B) {
; VECTOR-NEXT: vst %v2, 160(%r15), 3
; VECTOR-NEXT: vst %v1, 208(%r15), 3
; VECTOR-NEXT: vst %v0, 192(%r15), 3
-; VECTOR-NEXT: brasl %r14, fnptr at PLT
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
; VECTOR-NEXT: lmg %r14, %r15, 336(%r15)
; VECTOR-NEXT: br %r14
- call void @fnptr(%Ty2 %A, %Ty2 %B)
+ call void @Fnptr(%Ty2 %A, %Ty2 %B)
ret void
}
@@ -162,7 +164,8 @@ define fastcc void @fun2(%Ty2 %A, %Ty2 %B) {
define fastcc void @fun3(%Ty3 %A) {
; CHECK-LABEL: fun3:
; CHECK: # %bb.0:
-; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: stmg %r12, %r15, 96(%r15)
+; CHECK-NEXT: .cfi_offset %r12, -64
; CHECK-NEXT: .cfi_offset %r13, -56
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
@@ -171,28 +174,29 @@ define fastcc void @fun3(%Ty3 %A) {
; CHECK-NEXT: lg %r0, 0(%r2)
; CHECK-NEXT: lg %r1, 8(%r2)
; CHECK-NEXT: lg %r14, 16(%r2)
+; CHECK-NEXT: lgrl %r13, Dst at GOT
; CHECK-NEXT: lg %r2, 24(%r2)
-; CHECK-NEXT: lg %r13, 0(%r4)
+; CHECK-NEXT: lg %r12, 0(%r4)
; CHECK-NEXT: lg %r4, 8(%r4)
-; CHECK-NEXT: stc %r5, 64
-; CHECK-NEXT: st %r3, 40
-; CHECK-NEXT: ste %f0, 0
-; CHECK-NEXT: stg %r4, 56
-; CHECK-NEXT: stg %r13, 48
-; CHECK-NEXT: stg %r2, 32
-; CHECK-NEXT: stg %r14, 24
-; CHECK-NEXT: stg %r1, 16
-; CHECK-NEXT: stg %r0, 8
+; CHECK-NEXT: stc %r5, 64(%r13)
+; CHECK-NEXT: st %r3, 40(%r13)
+; CHECK-NEXT: ste %f0, 0(%r13)
+; CHECK-NEXT: stg %r4, 56(%r13)
+; CHECK-NEXT: stg %r12, 48(%r13)
+; CHECK-NEXT: stg %r2, 32(%r13)
+; CHECK-NEXT: stg %r14, 24(%r13)
+; CHECK-NEXT: stg %r1, 16(%r13)
+; CHECK-NEXT: stg %r0, 8(%r13)
; CHECK-NEXT: stg %r4, 168(%r15)
-; CHECK-NEXT: stg %r13, 160(%r15)
+; CHECK-NEXT: stg %r12, 160(%r15)
; CHECK-NEXT: stg %r2, 200(%r15)
; CHECK-NEXT: stg %r14, 192(%r15)
; CHECK-NEXT: stg %r1, 184(%r15)
; CHECK-NEXT: la %r2, 176(%r15)
; CHECK-NEXT: la %r4, 160(%r15)
; CHECK-NEXT: stg %r0, 176(%r15)
-; CHECK-NEXT: brasl %r14, fnptr at PLT
-; CHECK-NEXT: lmg %r13, %r15, 312(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lmg %r12, %r15, 304(%r15)
; CHECK-NEXT: br %r14
;
; VECTOR-LABEL: fun3:
@@ -205,22 +209,23 @@ define fastcc void @fun3(%Ty3 %A) {
; VECTOR-NEXT: vl %v1, 0(%r4), 3
; VECTOR-NEXT: vl %v2, 0(%r2), 3
; VECTOR-NEXT: vl %v3, 16(%r2), 3
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
; VECTOR-NEXT: la %r2, 176(%r15)
; VECTOR-NEXT: la %r4, 160(%r15)
-; VECTOR-NEXT: stc %r5, 64
-; VECTOR-NEXT: st %r3, 40
-; VECTOR-NEXT: ste %f0, 0
-; VECTOR-NEXT: vst %v3, 24, 3
-; VECTOR-NEXT: vst %v2, 8, 3
-; VECTOR-NEXT: vst %v1, 48, 3
+; VECTOR-NEXT: stc %r5, 64(%r1)
+; VECTOR-NEXT: st %r3, 40(%r1)
+; VECTOR-NEXT: ste %f0, 0(%r1)
+; VECTOR-NEXT: vst %v3, 24(%r1), 3
+; VECTOR-NEXT: vst %v2, 8(%r1), 3
+; VECTOR-NEXT: vst %v1, 48(%r1), 3
; VECTOR-NEXT: vst %v1, 160(%r15), 3
; VECTOR-NEXT: vst %v3, 192(%r15), 3
; VECTOR-NEXT: vst %v2, 176(%r15), 3
-; VECTOR-NEXT: brasl %r14, fnptr at PLT
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
; VECTOR-NEXT: lmg %r14, %r15, 320(%r15)
; VECTOR-NEXT: br %r14
- store %Ty3 %A, ptr null
- call void @fnptr(%Ty3 %A)
+ store %Ty3 %A, ptr @Dst
+ call void @Fnptr(%Ty3 %A)
ret void
}
@@ -234,22 +239,23 @@ define fastcc void @fun4(%Ty4 %A) {
; CHECK-NEXT: aghi %r15, -192
; CHECK-NEXT: .cfi_def_cfa_offset 352
; CHECK-NEXT: lg %r0, 8(%r3)
-; CHECK-NEXT: lg %r1, 0(%r3)
-; CHECK-NEXT: lg %r3, 8(%r2)
-; CHECK-NEXT: lg %r4, 0(%r2)
-; CHECK-NEXT: stg %r0, 24
-; CHECK-NEXT: stg %r1, 16
-; CHECK-NEXT: stc %r3, 8
-; CHECK-NEXT: sllg %r2, %r4, 56
-; CHECK-NEXT: rosbg %r2, %r3, 8, 63, 56
-; CHECK-NEXT: stg %r2, 0
+; CHECK-NEXT: lgrl %r1, Dst at GOT
+; CHECK-NEXT: lg %r3, 0(%r3)
+; CHECK-NEXT: lg %r4, 8(%r2)
+; CHECK-NEXT: lg %r5, 0(%r2)
+; CHECK-NEXT: stg %r0, 24(%r1)
+; CHECK-NEXT: stg %r3, 16(%r1)
+; CHECK-NEXT: stc %r4, 8(%r1)
+; CHECK-NEXT: sllg %r2, %r5, 56
+; CHECK-NEXT: rosbg %r2, %r4, 8, 63, 56
+; CHECK-NEXT: stg %r2, 0(%r1)
; CHECK-NEXT: stg %r0, 168(%r15)
-; CHECK-NEXT: stg %r1, 160(%r15)
-; CHECK-NEXT: stg %r3, 184(%r15)
+; CHECK-NEXT: stg %r3, 160(%r15)
+; CHECK-NEXT: stg %r4, 184(%r15)
; CHECK-NEXT: la %r2, 176(%r15)
; CHECK-NEXT: la %r3, 160(%r15)
-; CHECK-NEXT: stg %r4, 176(%r15)
-; CHECK-NEXT: brasl %r14, fnptr at PLT
+; CHECK-NEXT: stg %r5, 176(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
; CHECK-NEXT: lmg %r14, %r15, 304(%r15)
; CHECK-NEXT: br %r14
;
@@ -262,62 +268,303 @@ define fastcc void @fun4(%Ty4 %A) {
; VECTOR-NEXT: .cfi_def_cfa_offset 352
; VECTOR-NEXT: vl %v1, 0(%r2), 3
; VECTOR-NEXT: vl %v0, 0(%r3), 3
-; VECTOR-NEXT: vsteb %v1, 8, 15
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
; VECTOR-NEXT: vrepib %v2, 8
; VECTOR-NEXT: vsrlb %v2, %v1, %v2
+; VECTOR-NEXT: vsteb %v1, 8(%r1), 15
; VECTOR-NEXT: la %r2, 176(%r15)
; VECTOR-NEXT: la %r3, 160(%r15)
-; VECTOR-NEXT: vst %v0, 16, 3
-; VECTOR-NEXT: vsteg %v2, 0, 1
+; VECTOR-NEXT: vst %v0, 16(%r1), 3
+; VECTOR-NEXT: vsteg %v2, 0(%r1), 1
; VECTOR-NEXT: vst %v0, 160(%r15), 3
; VECTOR-NEXT: vst %v1, 176(%r15), 3
-; VECTOR-NEXT: brasl %r14, fnptr at PLT
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
; VECTOR-NEXT: lmg %r14, %r15, 304(%r15)
; VECTOR-NEXT: br %r14
- store %Ty4 %A, ptr null
- call void @fnptr(%Ty4 %A)
+ store %Ty4 %A, ptr @Dst
+ call void @Fnptr(%Ty4 %A)
ret void
}
%Ty5 = type {i128, i128}
declare fastcc %Ty5 @foo5()
-define fastcc void @fun5() {
+define fastcc %Ty5 @fun5() {
; CHECK-LABEL: fun5:
; CHECK: # %bb.0:
-; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: aghi %r15, -192
; CHECK-NEXT: .cfi_def_cfa_offset 352
+; CHECK-NEXT: lgr %r13, %r2
; CHECK-NEXT: la %r2, 160(%r15)
; CHECK-NEXT: brasl %r14, foo5 at PLT
-; CHECK-NEXT: lg %r0, 176(%r15)
-; CHECK-NEXT: lg %r1, 184(%r15)
+; CHECK-NEXT: lg %r0, 168(%r15)
+; CHECK-NEXT: lgrl %r1, Src at GOT
; CHECK-NEXT: lg %r2, 160(%r15)
-; CHECK-NEXT: lg %r3, 168(%r15)
-; CHECK-NEXT: stg %r0, 16
-; CHECK-NEXT: stg %r1, 24
-; CHECK-NEXT: stg %r2, 0
-; CHECK-NEXT: stg %r3, 8
-; CHECK-NEXT: lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT: alg %r0, 8(%r1)
+; CHECK-NEXT: lg %r3, 176(%r15)
+; CHECK-NEXT: lg %r4, 184(%r15)
+; CHECK-NEXT: alcg %r2, 0(%r1)
+; CHECK-NEXT: stg %r3, 16(%r13)
+; CHECK-NEXT: stg %r4, 24(%r13)
+; CHECK-NEXT: stg %r0, 8(%r13)
+; CHECK-NEXT: stg %r2, 0(%r13)
+; CHECK-NEXT: lmg %r13, %r15, 296(%r15)
; CHECK-NEXT: br %r14
;
; VECTOR-LABEL: fun5:
; VECTOR: # %bb.0:
-; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT: .cfi_offset %r13, -56
; VECTOR-NEXT: .cfi_offset %r14, -48
; VECTOR-NEXT: .cfi_offset %r15, -40
; VECTOR-NEXT: aghi %r15, -192
; VECTOR-NEXT: .cfi_def_cfa_offset 352
+; VECTOR-NEXT: lgr %r13, %r2
; VECTOR-NEXT: la %r2, 160(%r15)
; VECTOR-NEXT: brasl %r14, foo5 at PLT
-; VECTOR-NEXT: vl %v0, 160(%r15), 3
-; VECTOR-NEXT: vl %v1, 176(%r15), 3
-; VECTOR-NEXT: vst %v1, 16, 3
-; VECTOR-NEXT: vst %v0, 0, 3
-; VECTOR-NEXT: lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: vl %v1, 160(%r15), 3
+; VECTOR-NEXT: vl %v2, 0(%r1), 3
+; VECTOR-NEXT: vl %v0, 176(%r15), 3
+; VECTOR-NEXT: vaq %v1, %v1, %v2
+; VECTOR-NEXT: vst %v0, 16(%r13), 3
+; VECTOR-NEXT: vst %v1, 0(%r13), 3
+; VECTOR-NEXT: lmg %r13, %r15, 296(%r15)
; VECTOR-NEXT: br %r14
- %A = call %Ty5 @foo5()
- store %Ty5 %A, ptr null
- ret void
+ %V = call %Ty5 @foo5()
+ %Val0 = extractvalue %Ty5 %V, 0
+ %Ld = load i128, ptr @Src
+ %Add = add i128 %Val0, %Ld
+ %Res = insertvalue %Ty5 %V, i128 %Add, 0
+ ret %Ty5 %Res
+}
+
+%Ty6 = type {float, i128, i16}
+declare fastcc %Ty6 @foo6()
+define fastcc %Ty6 @fun6() {
+; CHECK-LABEL: fun6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -192
+; CHECK-NEXT: .cfi_def_cfa_offset 352
+; CHECK-NEXT: lgr %r13, %r2
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: brasl %r14, foo6 at PLT
+; CHECK-NEXT: lg %r0, 176(%r15)
+; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: lg %r2, 168(%r15)
+; CHECK-NEXT: alg %r0, 8(%r1)
+; CHECK-NEXT: le %f0, 160(%r15)
+; CHECK-NEXT: lh %r3, 184(%r15)
+; CHECK-NEXT: alcg %r2, 0(%r1)
+; CHECK-NEXT: ste %f0, 0(%r13)
+; CHECK-NEXT: sth %r3, 24(%r13)
+; CHECK-NEXT: stg %r0, 16(%r13)
+; CHECK-NEXT: stg %r2, 8(%r13)
+; CHECK-NEXT: lmg %r13, %r15, 296(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: fun6:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT: .cfi_offset %r13, -56
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -192
+; VECTOR-NEXT: .cfi_def_cfa_offset 352
+; VECTOR-NEXT: lgr %r13, %r2
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: brasl %r14, foo6 at PLT
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: vl %v1, 168(%r15), 3
+; VECTOR-NEXT: vl %v2, 0(%r1), 3
+; VECTOR-NEXT: lh %r0, 184(%r15)
+; VECTOR-NEXT: lde %f0, 160(%r15)
+; VECTOR-NEXT: vaq %v1, %v1, %v2
+; VECTOR-NEXT: sth %r0, 24(%r13)
+; VECTOR-NEXT: vst %v1, 8(%r13), 3
+; VECTOR-NEXT: ste %f0, 0(%r13)
+; VECTOR-NEXT: lmg %r13, %r15, 296(%r15)
+; VECTOR-NEXT: br %r14
+ %V = call %Ty6 @foo6()
+ %Val1 = extractvalue %Ty6 %V, 1
+ %Ld = load i128, ptr @Src
+ %Add = add i128 %Val1, %Ld
+ %Res = insertvalue %Ty6 %V, i128 %Add, 1
+ ret %Ty6 %Res
+}
+
+%Ty7 = type [4 x i128]
+declare fastcc %Ty7 @foo7()
+define fastcc %Ty7 @fun7() {
+; CHECK-LABEL: fun7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r12, %r15, 96(%r15)
+; CHECK-NEXT: .cfi_offset %r12, -64
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -224
+; CHECK-NEXT: .cfi_def_cfa_offset 384
+; CHECK-NEXT: lgr %r13, %r2
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: brasl %r14, foo7 at PLT
+; CHECK-NEXT: lg %r0, 200(%r15)
+; CHECK-NEXT: lg %r1, 192(%r15)
+; CHECK-NEXT: lg %r2, 176(%r15)
+; CHECK-NEXT: lg %r3, 184(%r15)
+; CHECK-NEXT: lg %r4, 168(%r15)
+; CHECK-NEXT: lg %r5, 160(%r15)
+; CHECK-NEXT: lg %r14, 208(%r15)
+; CHECK-NEXT: lg %r12, 216(%r15)
+; CHECK-NEXT: algr %r4, %r3
+; CHECK-NEXT: alcgr %r5, %r2
+; CHECK-NEXT: stg %r14, 48(%r13)
+; CHECK-NEXT: stg %r12, 56(%r13)
+; CHECK-NEXT: stg %r1, 32(%r13)
+; CHECK-NEXT: stg %r0, 40(%r13)
+; CHECK-NEXT: stg %r2, 16(%r13)
+; CHECK-NEXT: stg %r3, 24(%r13)
+; CHECK-NEXT: stg %r4, 8(%r13)
+; CHECK-NEXT: stg %r5, 0(%r13)
+; CHECK-NEXT: lmg %r12, %r15, 320(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: fun7:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT: .cfi_offset %r13, -56
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -224
+; VECTOR-NEXT: .cfi_def_cfa_offset 384
+; VECTOR-NEXT: lgr %r13, %r2
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: brasl %r14, foo7 at PLT
+; VECTOR-NEXT: vl %v2, 176(%r15), 3
+; VECTOR-NEXT: vl %v3, 160(%r15), 3
+; VECTOR-NEXT: vl %v0, 192(%r15), 3
+; VECTOR-NEXT: vl %v1, 208(%r15), 3
+; VECTOR-NEXT: vaq %v3, %v3, %v2
+; VECTOR-NEXT: vst %v1, 48(%r13), 3
+; VECTOR-NEXT: vst %v0, 32(%r13), 3
+; VECTOR-NEXT: vst %v2, 16(%r13), 3
+; VECTOR-NEXT: vst %v3, 0(%r13), 3
+; VECTOR-NEXT: lmg %r13, %r15, 328(%r15)
+; VECTOR-NEXT: br %r14
+ %V = call %Ty7 @foo7()
+ %Val0 = extractvalue %Ty7 %V, 0
+ %Val1 = extractvalue %Ty7 %V, 1
+ %Add = add i128 %Val0, %Val1
+ %Res = insertvalue %Ty7 %V, i128 %Add, 0
+ ret %Ty7 %Res
+}
+
+%Ty8 = type {float, [2 x i128], i32}
+declare fastcc %Ty8 @foo8()
+define fastcc %Ty8 @fun8() {
+; CHECK-LABEL: fun8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -208
+; CHECK-NEXT: .cfi_def_cfa_offset 368
+; CHECK-NEXT: lgr %r13, %r2
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: brasl %r14, foo8 at PLT
+; CHECK-NEXT: lg %r0, 176(%r15)
+; CHECK-NEXT: lg %r1, 168(%r15)
+; CHECK-NEXT: le %f0, 160(%r15)
+; CHECK-NEXT: lhi %r2, 1
+; CHECK-NEXT: a %r2, 200(%r15)
+; CHECK-NEXT: lg %r3, 184(%r15)
+; CHECK-NEXT: lg %r4, 192(%r15)
+; CHECK-NEXT: ste %f0, 0(%r13)
+; CHECK-NEXT: st %r2, 40(%r13)
+; CHECK-NEXT: stg %r3, 24(%r13)
+; CHECK-NEXT: stg %r4, 32(%r13)
+; CHECK-NEXT: stg %r1, 8(%r13)
+; CHECK-NEXT: stg %r0, 16(%r13)
+; CHECK-NEXT: lmg %r13, %r15, 312(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: fun8:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT: .cfi_offset %r13, -56
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -208
+; VECTOR-NEXT: .cfi_def_cfa_offset 368
+; VECTOR-NEXT: lgr %r13, %r2
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: brasl %r14, foo8 at PLT
+; VECTOR-NEXT: lhi %r0, 1
+; VECTOR-NEXT: a %r0, 200(%r15)
+; VECTOR-NEXT: lde %f0, 160(%r15)
+; VECTOR-NEXT: vl %v1, 168(%r15), 3
+; VECTOR-NEXT: vl %v2, 184(%r15), 3
+; VECTOR-NEXT: st %r0, 40(%r13)
+; VECTOR-NEXT: vst %v2, 24(%r13), 3
+; VECTOR-NEXT: vst %v1, 8(%r13), 3
+; VECTOR-NEXT: ste %f0, 0(%r13)
+; VECTOR-NEXT: lmg %r13, %r15, 312(%r15)
+; VECTOR-NEXT: br %r14
+ %V = call %Ty8 @foo8()
+ %Val2 = extractvalue %Ty8 %V, 2
+ %Add = add i32 %Val2, 1
+ %Res = insertvalue %Ty8 %V, i32 %Add, 2
+ ret %Ty8 %Res
+}
+
+%Ty9 = type {i72}
+declare fastcc %Ty9 @foo9(%Ty9)
+define fastcc %Ty9 @fun9(%Ty9 %A) {
+; CHECK-LABEL: fun9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -176
+; CHECK-NEXT: .cfi_def_cfa_offset 336
+; CHECK-NEXT: lg %r0, 8(%r2)
+; CHECK-NEXT: lg %r1, 0(%r2)
+; CHECK-NEXT: stg %r0, 168(%r15)
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: stg %r1, 160(%r15)
+; CHECK-NEXT: brasl %r14, foo9 at PLT
+; CHECK-NEXT: lmg %r14, %r15, 288(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: fun9:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT: .cfi_offset %r13, -56
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -192
+; VECTOR-NEXT: .cfi_def_cfa_offset 352
+; VECTOR-NEXT: vl %v0, 0(%r3), 3
+; VECTOR-NEXT: lgr %r13, %r2
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: la %r3, 176(%r15)
+; VECTOR-NEXT: vst %v0, 176(%r15), 3
+; VECTOR-NEXT: brasl %r14, foo9 at PLT
+; VECTOR-NEXT: vgbm %v0, 0
+; VECTOR-NEXT: vleb %v0, 168(%r15), 15
+; VECTOR-NEXT: vlrepg %v1, 160(%r15)
+; VECTOR-NEXT: vsteg %v1, 0(%r13), 1
+; VECTOR-NEXT: vsteb %v0, 8(%r13), 15
+; VECTOR-NEXT: lmg %r13, %r15, 296(%r15)
+; VECTOR-NEXT: br %r14
+ %Res = call %Ty9 @foo9(%Ty9 %A)
+ ret %Ty9 %Res
}
>From 25131e075160e0cc0ecde40442c07431da003c37 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulsson at linux.vnet.ibm.com>
Date: Thu, 4 Dec 2025 02:33:30 +0100
Subject: [PATCH 5/5] Fix CanLowerReturn() to return false for any scalar
integer >64 bits. Tests reworked.
---
.../Target/SystemZ/SystemZISelLowering.cpp | 2 +-
llvm/test/CodeGen/SystemZ/args-22.ll | 1122 ++++++++++++-----
2 files changed, 779 insertions(+), 345 deletions(-)
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index ab22c0efa8454..2511d08a6d0ef 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -2562,7 +2562,7 @@ bool SystemZTargetLowering::CanLowerReturn(
// Special case that we cannot easily detect in RetCC_SystemZ since
// i128 may not be a legal type.
for (auto &Out : Outs)
- if (Out.ArgVT == MVT::i128)
+ if (Out.ArgVT.isScalarInteger() && Out.ArgVT.getSizeInBits() > 64)
return false;
SmallVector<CCValAssign, 16> RetLocs;
diff --git a/llvm/test/CodeGen/SystemZ/args-22.ll b/llvm/test/CodeGen/SystemZ/args-22.ll
index da0f3e08c7a62..ba422b65fc299 100644
--- a/llvm/test/CodeGen/SystemZ/args-22.ll
+++ b/llvm/test/CodeGen/SystemZ/args-22.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s --check-prefix=VECTOR
;
-; Test passing IR struct arguments, which do not adhere to the ABI but are
+; Test passing IR struct arguments, which do not adhere to the SystemZ ABI but are
; split up with each element passed like a separate argument.
@Fnptr = external global ptr
@@ -10,16 +10,37 @@
@Dst = external global ptr
%Ty0 = type {i128}
-define fastcc void @fun0(%Ty0 %A) {
-; CHECK-LABEL: fun0:
+define void @arg0(%Ty0 %A) {
+; CHECK-LABEL: arg0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lg %r0, 8(%r2)
+; CHECK-NEXT: lgrl %r1, Dst at GOT
+; CHECK-NEXT: lg %r2, 0(%r2)
+; CHECK-NEXT: stg %r0, 8(%r1)
+; CHECK-NEXT: stg %r2, 0(%r1)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: arg0:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: vl %v0, 0(%r2), 3
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
+; VECTOR-NEXT: vst %v0, 0(%r1), 3
+; VECTOR-NEXT: br %r14
+ store %Ty0 %A, ptr @Dst
+ ret void
+}
+
+define void @call0() {
+; CHECK-LABEL: call0:
; CHECK: # %bb.0:
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: aghi %r15, -176
; CHECK-NEXT: .cfi_def_cfa_offset 336
-; CHECK-NEXT: lg %r0, 8(%r2)
-; CHECK-NEXT: lg %r1, 0(%r2)
+; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: lg %r0, 8(%r1)
+; CHECK-NEXT: lg %r1, 0(%r1)
; CHECK-NEXT: stg %r0, 168(%r15)
; CHECK-NEXT: la %r2, 160(%r15)
; CHECK-NEXT: stg %r1, 160(%r15)
@@ -27,269 +48,248 @@ define fastcc void @fun0(%Ty0 %A) {
; CHECK-NEXT: lmg %r14, %r15, 288(%r15)
; CHECK-NEXT: br %r14
;
-; VECTOR-LABEL: fun0:
+; VECTOR-LABEL: call0:
; VECTOR: # %bb.0:
; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
; VECTOR-NEXT: .cfi_offset %r14, -48
; VECTOR-NEXT: .cfi_offset %r15, -40
; VECTOR-NEXT: aghi %r15, -176
; VECTOR-NEXT: .cfi_def_cfa_offset 336
-; VECTOR-NEXT: vl %v0, 0(%r2), 3
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: vl %v0, 0(%r1), 3
; VECTOR-NEXT: la %r2, 160(%r15)
; VECTOR-NEXT: vst %v0, 160(%r15), 3
; VECTOR-NEXT: brasl %r14, Fnptr at PLT
; VECTOR-NEXT: lmg %r14, %r15, 288(%r15)
; VECTOR-NEXT: br %r14
- call void @Fnptr(%Ty0 %A)
+ %L = load %Ty0, ptr @Src
+ call void @Fnptr(%Ty0 %L)
ret void
}
-%Ty1 = type {i128, i128}
-define fastcc void @fun1(%Ty1 %A, %Ty1 %B) {
-; CHECK-LABEL: fun1:
+define %Ty0 @ret0() {
+; CHECK-LABEL: ret0:
; CHECK: # %bb.0:
; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
; CHECK-NEXT: .cfi_offset %r13, -56
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
-; CHECK-NEXT: aghi %r15, -224
-; CHECK-NEXT: .cfi_def_cfa_offset 384
-; CHECK-NEXT: lg %r0, 0(%r2)
-; CHECK-NEXT: lg %r1, 8(%r2)
-; CHECK-NEXT: lg %r2, 0(%r3)
-; CHECK-NEXT: lg %r3, 8(%r3)
-; CHECK-NEXT: lg %r14, 8(%r5)
-; CHECK-NEXT: lg %r5, 0(%r5)
-; CHECK-NEXT: lg %r13, 8(%r4)
-; CHECK-NEXT: lg %r4, 0(%r4)
-; CHECK-NEXT: stg %r14, 168(%r15)
-; CHECK-NEXT: stg %r5, 160(%r15)
-; CHECK-NEXT: stg %r13, 184(%r15)
-; CHECK-NEXT: stg %r4, 176(%r15)
-; CHECK-NEXT: stg %r3, 200(%r15)
-; CHECK-NEXT: stg %r2, 192(%r15)
-; CHECK-NEXT: stg %r1, 216(%r15)
-; CHECK-NEXT: la %r2, 208(%r15)
-; CHECK-NEXT: la %r3, 192(%r15)
-; CHECK-NEXT: la %r4, 176(%r15)
-; CHECK-NEXT: la %r5, 160(%r15)
-; CHECK-NEXT: stg %r0, 208(%r15)
+; CHECK-NEXT: aghi %r15, -176
+; CHECK-NEXT: .cfi_def_cfa_offset 336
+; CHECK-NEXT: lgr %r13, %r2
+; CHECK-NEXT: la %r2, 160(%r15)
; CHECK-NEXT: brasl %r14, Fnptr at PLT
-; CHECK-NEXT: lmg %r13, %r15, 328(%r15)
+; CHECK-NEXT: lg %r0, 168(%r15)
+; CHECK-NEXT: lg %r1, 160(%r15)
+; CHECK-NEXT: stg %r0, 8(%r13)
+; CHECK-NEXT: stg %r1, 0(%r13)
+; CHECK-NEXT: lmg %r13, %r15, 280(%r15)
; CHECK-NEXT: br %r14
;
-; VECTOR-LABEL: fun1:
+; VECTOR-LABEL: ret0:
; VECTOR: # %bb.0:
-; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT: .cfi_offset %r13, -56
; VECTOR-NEXT: .cfi_offset %r14, -48
; VECTOR-NEXT: .cfi_offset %r15, -40
-; VECTOR-NEXT: aghi %r15, -224
-; VECTOR-NEXT: .cfi_def_cfa_offset 384
-; VECTOR-NEXT: vl %v0, 0(%r2), 3
-; VECTOR-NEXT: vl %v1, 0(%r3), 3
-; VECTOR-NEXT: vl %v2, 0(%r4), 3
-; VECTOR-NEXT: vl %v3, 0(%r5), 3
-; VECTOR-NEXT: la %r2, 208(%r15)
-; VECTOR-NEXT: la %r3, 192(%r15)
-; VECTOR-NEXT: la %r4, 176(%r15)
-; VECTOR-NEXT: la %r5, 160(%r15)
-; VECTOR-NEXT: vst %v3, 160(%r15), 3
-; VECTOR-NEXT: vst %v2, 176(%r15), 3
-; VECTOR-NEXT: vst %v1, 192(%r15), 3
-; VECTOR-NEXT: vst %v0, 208(%r15), 3
+; VECTOR-NEXT: aghi %r15, -176
+; VECTOR-NEXT: .cfi_def_cfa_offset 336
+; VECTOR-NEXT: lgr %r13, %r2
+; VECTOR-NEXT: la %r2, 160(%r15)
; VECTOR-NEXT: brasl %r14, Fnptr at PLT
-; VECTOR-NEXT: lmg %r14, %r15, 336(%r15)
+; VECTOR-NEXT: vl %v0, 160(%r15), 3
+; VECTOR-NEXT: vst %v0, 0(%r13), 3
+; VECTOR-NEXT: lmg %r13, %r15, 280(%r15)
+; VECTOR-NEXT: br %r14
+ %C = call %Ty0 @Fnptr()
+ ret %Ty0 %C
+}
+
+%Ty1 = type {i72}
+define void @arg1(%Ty1 %A) {
+; CHECK-LABEL: arg1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lg %r0, 8(%r2)
+; CHECK-NEXT: lgrl %r1, Dst at GOT
+; CHECK-NEXT: lg %r2, 0(%r2)
+; CHECK-NEXT: stc %r0, 8(%r1)
+; CHECK-NEXT: sllg %r2, %r2, 56
+; CHECK-NEXT: rosbg %r2, %r0, 8, 63, 56
+; CHECK-NEXT: stg %r2, 0(%r1)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: arg1:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: vl %v0, 0(%r2), 3
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
+; VECTOR-NEXT: vrepib %v1, 8
+; VECTOR-NEXT: vsteb %v0, 8(%r1), 15
+; VECTOR-NEXT: vsrlb %v0, %v0, %v1
+; VECTOR-NEXT: vsteg %v0, 0(%r1), 1
; VECTOR-NEXT: br %r14
- call void @Fnptr(%Ty1 %A, %Ty1 %B)
+ store %Ty1 %A, ptr @Dst
ret void
}
-%Ty2 = type {i256}
-define fastcc void @fun2(%Ty2 %A, %Ty2 %B) {
-; CHECK-LABEL: fun2:
+define void @call1() {
+; CHECK-LABEL: call1:
; CHECK: # %bb.0:
-; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
-; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
-; CHECK-NEXT: aghi %r15, -224
-; CHECK-NEXT: .cfi_def_cfa_offset 384
-; CHECK-NEXT: lg %r0, 0(%r2)
-; CHECK-NEXT: lg %r1, 8(%r2)
-; CHECK-NEXT: lg %r4, 16(%r2)
-; CHECK-NEXT: lg %r2, 24(%r2)
-; CHECK-NEXT: lg %r5, 24(%r3)
-; CHECK-NEXT: lg %r14, 16(%r3)
-; CHECK-NEXT: lg %r13, 8(%r3)
-; CHECK-NEXT: lg %r3, 0(%r3)
-; CHECK-NEXT: stg %r5, 184(%r15)
-; CHECK-NEXT: stg %r14, 176(%r15)
-; CHECK-NEXT: stg %r13, 168(%r15)
-; CHECK-NEXT: stg %r3, 160(%r15)
-; CHECK-NEXT: stg %r2, 216(%r15)
-; CHECK-NEXT: stg %r4, 208(%r15)
-; CHECK-NEXT: stg %r1, 200(%r15)
-; CHECK-NEXT: la %r2, 192(%r15)
-; CHECK-NEXT: la %r3, 160(%r15)
-; CHECK-NEXT: stg %r0, 192(%r15)
+; CHECK-NEXT: aghi %r15, -176
+; CHECK-NEXT: .cfi_def_cfa_offset 336
+; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: lg %r0, 0(%r1)
+; CHECK-NEXT: sllg %r2, %r0, 8
+; CHECK-NEXT: ic %r2, 8(%r1)
+; CHECK-NEXT: srlg %r0, %r0, 56
+; CHECK-NEXT: stg %r2, 168(%r15)
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: stg %r0, 160(%r15)
; CHECK-NEXT: brasl %r14, Fnptr at PLT
-; CHECK-NEXT: lmg %r13, %r15, 328(%r15)
+; CHECK-NEXT: lmg %r14, %r15, 288(%r15)
; CHECK-NEXT: br %r14
;
-; VECTOR-LABEL: fun2:
+; VECTOR-LABEL: call1:
; VECTOR: # %bb.0:
; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
; VECTOR-NEXT: .cfi_offset %r14, -48
; VECTOR-NEXT: .cfi_offset %r15, -40
-; VECTOR-NEXT: aghi %r15, -224
-; VECTOR-NEXT: .cfi_def_cfa_offset 384
-; VECTOR-NEXT: vl %v0, 0(%r2), 3
-; VECTOR-NEXT: vl %v1, 16(%r2), 3
-; VECTOR-NEXT: vl %v2, 0(%r3), 3
-; VECTOR-NEXT: vl %v3, 16(%r3), 3
-; VECTOR-NEXT: la %r2, 192(%r15)
-; VECTOR-NEXT: la %r3, 160(%r15)
-; VECTOR-NEXT: vst %v3, 176(%r15), 3
-; VECTOR-NEXT: vst %v2, 160(%r15), 3
-; VECTOR-NEXT: vst %v1, 208(%r15), 3
-; VECTOR-NEXT: vst %v0, 192(%r15), 3
+; VECTOR-NEXT: aghi %r15, -176
+; VECTOR-NEXT: .cfi_def_cfa_offset 336
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: vgbm %v0, 0
+; VECTOR-NEXT: vleb %v0, 8(%r1), 15
+; VECTOR-NEXT: vlrepg %v1, 0(%r1)
+; VECTOR-NEXT: vrepib %v2, 8
+; VECTOR-NEXT: vslb %v1, %v1, %v2
+; VECTOR-NEXT: vo %v0, %v0, %v1
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: vst %v0, 160(%r15), 3
; VECTOR-NEXT: brasl %r14, Fnptr at PLT
-; VECTOR-NEXT: lmg %r14, %r15, 336(%r15)
+; VECTOR-NEXT: lmg %r14, %r15, 288(%r15)
; VECTOR-NEXT: br %r14
- call void @Fnptr(%Ty2 %A, %Ty2 %B)
+ %L = load %Ty1, ptr @Src
+ call void @Fnptr(%Ty1 %L)
ret void
}
-%Ty3 = type {float, i256, i32, i128, i8}
-define fastcc void @fun3(%Ty3 %A) {
-; CHECK-LABEL: fun3:
+define %Ty1 @ret1() {
+; CHECK-LABEL: ret1:
; CHECK: # %bb.0:
-; CHECK-NEXT: stmg %r12, %r15, 96(%r15)
-; CHECK-NEXT: .cfi_offset %r12, -64
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
; CHECK-NEXT: .cfi_offset %r13, -56
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
-; CHECK-NEXT: aghi %r15, -208
-; CHECK-NEXT: .cfi_def_cfa_offset 368
-; CHECK-NEXT: lg %r0, 0(%r2)
-; CHECK-NEXT: lg %r1, 8(%r2)
-; CHECK-NEXT: lg %r14, 16(%r2)
-; CHECK-NEXT: lgrl %r13, Dst at GOT
-; CHECK-NEXT: lg %r2, 24(%r2)
-; CHECK-NEXT: lg %r12, 0(%r4)
-; CHECK-NEXT: lg %r4, 8(%r4)
-; CHECK-NEXT: stc %r5, 64(%r13)
-; CHECK-NEXT: st %r3, 40(%r13)
-; CHECK-NEXT: ste %f0, 0(%r13)
-; CHECK-NEXT: stg %r4, 56(%r13)
-; CHECK-NEXT: stg %r12, 48(%r13)
-; CHECK-NEXT: stg %r2, 32(%r13)
-; CHECK-NEXT: stg %r14, 24(%r13)
-; CHECK-NEXT: stg %r1, 16(%r13)
-; CHECK-NEXT: stg %r0, 8(%r13)
-; CHECK-NEXT: stg %r4, 168(%r15)
-; CHECK-NEXT: stg %r12, 160(%r15)
-; CHECK-NEXT: stg %r2, 200(%r15)
-; CHECK-NEXT: stg %r14, 192(%r15)
-; CHECK-NEXT: stg %r1, 184(%r15)
-; CHECK-NEXT: la %r2, 176(%r15)
-; CHECK-NEXT: la %r4, 160(%r15)
-; CHECK-NEXT: stg %r0, 176(%r15)
+; CHECK-NEXT: aghi %r15, -176
+; CHECK-NEXT: .cfi_def_cfa_offset 336
+; CHECK-NEXT: lgr %r13, %r2
+; CHECK-NEXT: la %r2, 160(%r15)
; CHECK-NEXT: brasl %r14, Fnptr at PLT
-; CHECK-NEXT: lmg %r12, %r15, 304(%r15)
+; CHECK-NEXT: lg %r0, 160(%r15)
+; CHECK-NEXT: llgc %r1, 168(%r15)
+; CHECK-NEXT: stg %r0, 0(%r13)
+; CHECK-NEXT: stc %r1, 8(%r13)
+; CHECK-NEXT: lmg %r13, %r15, 280(%r15)
; CHECK-NEXT: br %r14
;
-; VECTOR-LABEL: fun3:
+; VECTOR-LABEL: ret1:
; VECTOR: # %bb.0:
-; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT: .cfi_offset %r13, -56
; VECTOR-NEXT: .cfi_offset %r14, -48
; VECTOR-NEXT: .cfi_offset %r15, -40
-; VECTOR-NEXT: aghi %r15, -208
-; VECTOR-NEXT: .cfi_def_cfa_offset 368
-; VECTOR-NEXT: vl %v1, 0(%r4), 3
-; VECTOR-NEXT: vl %v2, 0(%r2), 3
-; VECTOR-NEXT: vl %v3, 16(%r2), 3
-; VECTOR-NEXT: lgrl %r1, Dst at GOT
-; VECTOR-NEXT: la %r2, 176(%r15)
-; VECTOR-NEXT: la %r4, 160(%r15)
-; VECTOR-NEXT: stc %r5, 64(%r1)
-; VECTOR-NEXT: st %r3, 40(%r1)
-; VECTOR-NEXT: ste %f0, 0(%r1)
-; VECTOR-NEXT: vst %v3, 24(%r1), 3
-; VECTOR-NEXT: vst %v2, 8(%r1), 3
-; VECTOR-NEXT: vst %v1, 48(%r1), 3
-; VECTOR-NEXT: vst %v1, 160(%r15), 3
-; VECTOR-NEXT: vst %v3, 192(%r15), 3
-; VECTOR-NEXT: vst %v2, 176(%r15), 3
+; VECTOR-NEXT: aghi %r15, -176
+; VECTOR-NEXT: .cfi_def_cfa_offset 336
+; VECTOR-NEXT: lgr %r13, %r2
+; VECTOR-NEXT: la %r2, 160(%r15)
; VECTOR-NEXT: brasl %r14, Fnptr at PLT
-; VECTOR-NEXT: lmg %r14, %r15, 320(%r15)
+; VECTOR-NEXT: vgbm %v0, 0
+; VECTOR-NEXT: vleb %v0, 168(%r15), 15
+; VECTOR-NEXT: vlrepg %v1, 160(%r15)
+; VECTOR-NEXT: vsteg %v1, 0(%r13), 1
+; VECTOR-NEXT: vsteb %v0, 8(%r13), 15
+; VECTOR-NEXT: lmg %r13, %r15, 280(%r15)
; VECTOR-NEXT: br %r14
- store %Ty3 %A, ptr @Dst
- call void @Fnptr(%Ty3 %A)
- ret void
+ %C = call %Ty1 @Fnptr()
+ ret %Ty1 %C
}
-%Ty4 = type {i72, i128}
-define fastcc void @fun4(%Ty4 %A) {
-; CHECK-LABEL: fun4:
+%Ty2 = type {i128, i128}
+define void @arg2(%Ty2 %A) {
+; CHECK-LABEL: arg2:
; CHECK: # %bb.0:
-; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
-; CHECK-NEXT: .cfi_offset %r14, -48
-; CHECK-NEXT: .cfi_offset %r15, -40
-; CHECK-NEXT: aghi %r15, -192
-; CHECK-NEXT: .cfi_def_cfa_offset 352
; CHECK-NEXT: lg %r0, 8(%r3)
; CHECK-NEXT: lgrl %r1, Dst at GOT
; CHECK-NEXT: lg %r3, 0(%r3)
; CHECK-NEXT: lg %r4, 8(%r2)
-; CHECK-NEXT: lg %r5, 0(%r2)
+; CHECK-NEXT: lg %r2, 0(%r2)
; CHECK-NEXT: stg %r0, 24(%r1)
; CHECK-NEXT: stg %r3, 16(%r1)
-; CHECK-NEXT: stc %r4, 8(%r1)
-; CHECK-NEXT: sllg %r2, %r5, 56
-; CHECK-NEXT: rosbg %r2, %r4, 8, 63, 56
+; CHECK-NEXT: stg %r4, 8(%r1)
; CHECK-NEXT: stg %r2, 0(%r1)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: arg2:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: vl %v0, 0(%r2), 3
+; VECTOR-NEXT: vl %v1, 0(%r3), 3
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
+; VECTOR-NEXT: vst %v1, 16(%r1), 3
+; VECTOR-NEXT: vst %v0, 0(%r1), 3
+; VECTOR-NEXT: br %r14
+ store %Ty2 %A, ptr @Dst
+ ret void
+}
+
+define void @call2() {
+; CHECK-LABEL: call2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -192
+; CHECK-NEXT: .cfi_def_cfa_offset 352
+; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: lg %r0, 24(%r1)
+; CHECK-NEXT: lg %r2, 16(%r1)
+; CHECK-NEXT: lg %r3, 8(%r1)
+; CHECK-NEXT: lg %r1, 0(%r1)
; CHECK-NEXT: stg %r0, 168(%r15)
-; CHECK-NEXT: stg %r3, 160(%r15)
-; CHECK-NEXT: stg %r4, 184(%r15)
+; CHECK-NEXT: stg %r2, 160(%r15)
+; CHECK-NEXT: stg %r3, 184(%r15)
; CHECK-NEXT: la %r2, 176(%r15)
; CHECK-NEXT: la %r3, 160(%r15)
-; CHECK-NEXT: stg %r5, 176(%r15)
+; CHECK-NEXT: stg %r1, 176(%r15)
; CHECK-NEXT: brasl %r14, Fnptr at PLT
; CHECK-NEXT: lmg %r14, %r15, 304(%r15)
; CHECK-NEXT: br %r14
;
-; VECTOR-LABEL: fun4:
+; VECTOR-LABEL: call2:
; VECTOR: # %bb.0:
; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
; VECTOR-NEXT: .cfi_offset %r14, -48
; VECTOR-NEXT: .cfi_offset %r15, -40
; VECTOR-NEXT: aghi %r15, -192
; VECTOR-NEXT: .cfi_def_cfa_offset 352
-; VECTOR-NEXT: vl %v1, 0(%r2), 3
-; VECTOR-NEXT: vl %v0, 0(%r3), 3
-; VECTOR-NEXT: lgrl %r1, Dst at GOT
-; VECTOR-NEXT: vrepib %v2, 8
-; VECTOR-NEXT: vsrlb %v2, %v1, %v2
-; VECTOR-NEXT: vsteb %v1, 8(%r1), 15
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: vl %v0, 0(%r1), 3
+; VECTOR-NEXT: vl %v1, 16(%r1), 3
; VECTOR-NEXT: la %r2, 176(%r15)
; VECTOR-NEXT: la %r3, 160(%r15)
-; VECTOR-NEXT: vst %v0, 16(%r1), 3
-; VECTOR-NEXT: vsteg %v2, 0(%r1), 1
-; VECTOR-NEXT: vst %v0, 160(%r15), 3
-; VECTOR-NEXT: vst %v1, 176(%r15), 3
+; VECTOR-NEXT: vst %v1, 160(%r15), 3
+; VECTOR-NEXT: vst %v0, 176(%r15), 3
; VECTOR-NEXT: brasl %r14, Fnptr at PLT
; VECTOR-NEXT: lmg %r14, %r15, 304(%r15)
; VECTOR-NEXT: br %r14
- store %Ty4 %A, ptr @Dst
- call void @Fnptr(%Ty4 %A)
+ %L = load %Ty2, ptr @Src
+ call void @Fnptr(%Ty2 %L)
ret void
}
-%Ty5 = type {i128, i128}
-declare fastcc %Ty5 @foo5()
-define fastcc %Ty5 @fun5() {
-; CHECK-LABEL: fun5:
+define %Ty2 @ret2() {
+; CHECK-LABEL: ret2:
; CHECK: # %bb.0:
; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
; CHECK-NEXT: .cfi_offset %r13, -56
@@ -299,22 +299,19 @@ define fastcc %Ty5 @fun5() {
; CHECK-NEXT: .cfi_def_cfa_offset 352
; CHECK-NEXT: lgr %r13, %r2
; CHECK-NEXT: la %r2, 160(%r15)
-; CHECK-NEXT: brasl %r14, foo5 at PLT
-; CHECK-NEXT: lg %r0, 168(%r15)
-; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lg %r0, 176(%r15)
+; CHECK-NEXT: lg %r1, 184(%r15)
; CHECK-NEXT: lg %r2, 160(%r15)
-; CHECK-NEXT: alg %r0, 8(%r1)
-; CHECK-NEXT: lg %r3, 176(%r15)
-; CHECK-NEXT: lg %r4, 184(%r15)
-; CHECK-NEXT: alcg %r2, 0(%r1)
-; CHECK-NEXT: stg %r3, 16(%r13)
-; CHECK-NEXT: stg %r4, 24(%r13)
-; CHECK-NEXT: stg %r0, 8(%r13)
+; CHECK-NEXT: lg %r3, 168(%r15)
+; CHECK-NEXT: stg %r0, 16(%r13)
+; CHECK-NEXT: stg %r1, 24(%r13)
; CHECK-NEXT: stg %r2, 0(%r13)
+; CHECK-NEXT: stg %r3, 8(%r13)
; CHECK-NEXT: lmg %r13, %r15, 296(%r15)
; CHECK-NEXT: br %r14
;
-; VECTOR-LABEL: fun5:
+; VECTOR-LABEL: ret2:
; VECTOR: # %bb.0:
; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
; VECTOR-NEXT: .cfi_offset %r13, -56
@@ -324,152 +321,244 @@ define fastcc %Ty5 @fun5() {
; VECTOR-NEXT: .cfi_def_cfa_offset 352
; VECTOR-NEXT: lgr %r13, %r2
; VECTOR-NEXT: la %r2, 160(%r15)
-; VECTOR-NEXT: brasl %r14, foo5 at PLT
-; VECTOR-NEXT: lgrl %r1, Src at GOT
-; VECTOR-NEXT: vl %v1, 160(%r15), 3
-; VECTOR-NEXT: vl %v2, 0(%r1), 3
-; VECTOR-NEXT: vl %v0, 176(%r15), 3
-; VECTOR-NEXT: vaq %v1, %v1, %v2
-; VECTOR-NEXT: vst %v0, 16(%r13), 3
-; VECTOR-NEXT: vst %v1, 0(%r13), 3
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: vl %v0, 160(%r15), 3
+; VECTOR-NEXT: vl %v1, 176(%r15), 3
+; VECTOR-NEXT: vst %v1, 16(%r13), 3
+; VECTOR-NEXT: vst %v0, 0(%r13), 3
; VECTOR-NEXT: lmg %r13, %r15, 296(%r15)
; VECTOR-NEXT: br %r14
- %V = call %Ty5 @foo5()
- %Val0 = extractvalue %Ty5 %V, 0
- %Ld = load i128, ptr @Src
- %Add = add i128 %Val0, %Ld
- %Res = insertvalue %Ty5 %V, i128 %Add, 0
- ret %Ty5 %Res
+ %C = call %Ty2 @Fnptr()
+ ret %Ty2 %C
}
-%Ty6 = type {float, i128, i16}
-declare fastcc %Ty6 @foo6()
-define fastcc %Ty6 @fun6() {
-; CHECK-LABEL: fun6:
+%Ty3 = type {i72, i128}
+define void @arg3(%Ty3 %A) {
+; CHECK-LABEL: arg3:
; CHECK: # %bb.0:
-; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
-; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: lg %r0, 8(%r3)
+; CHECK-NEXT: lgrl %r1, Dst at GOT
+; CHECK-NEXT: lg %r3, 0(%r3)
+; CHECK-NEXT: lg %r4, 8(%r2)
+; CHECK-NEXT: lg %r2, 0(%r2)
+; CHECK-NEXT: stg %r0, 24(%r1)
+; CHECK-NEXT: stg %r3, 16(%r1)
+; CHECK-NEXT: stc %r4, 8(%r1)
+; CHECK-NEXT: sllg %r0, %r2, 56
+; CHECK-NEXT: rosbg %r0, %r4, 8, 63, 56
+; CHECK-NEXT: stg %r0, 0(%r1)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: arg3:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: vl %v0, 0(%r3), 3
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
+; VECTOR-NEXT: vl %v1, 0(%r2), 3
+; VECTOR-NEXT: vsteb %v1, 8(%r1), 15
+; VECTOR-NEXT: vst %v0, 16(%r1), 3
+; VECTOR-NEXT: vrepib %v0, 8
+; VECTOR-NEXT: vsrlb %v0, %v1, %v0
+; VECTOR-NEXT: vsteg %v0, 0(%r1), 1
+; VECTOR-NEXT: br %r14
+ store %Ty3 %A, ptr @Dst
+ ret void
+}
+
+define void @call3() {
+; CHECK-LABEL: call3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: aghi %r15, -192
; CHECK-NEXT: .cfi_def_cfa_offset 352
-; CHECK-NEXT: lgr %r13, %r2
-; CHECK-NEXT: la %r2, 160(%r15)
-; CHECK-NEXT: brasl %r14, foo6 at PLT
-; CHECK-NEXT: lg %r0, 176(%r15)
; CHECK-NEXT: lgrl %r1, Src at GOT
-; CHECK-NEXT: lg %r2, 168(%r15)
-; CHECK-NEXT: alg %r0, 8(%r1)
-; CHECK-NEXT: le %f0, 160(%r15)
-; CHECK-NEXT: lh %r3, 184(%r15)
-; CHECK-NEXT: alcg %r2, 0(%r1)
-; CHECK-NEXT: ste %f0, 0(%r13)
-; CHECK-NEXT: sth %r3, 24(%r13)
-; CHECK-NEXT: stg %r0, 16(%r13)
-; CHECK-NEXT: stg %r2, 8(%r13)
-; CHECK-NEXT: lmg %r13, %r15, 296(%r15)
+; CHECK-NEXT: lg %r0, 0(%r1)
+; CHECK-NEXT: sllg %r2, %r0, 8
+; CHECK-NEXT: lg %r3, 24(%r1)
+; CHECK-NEXT: lg %r4, 16(%r1)
+; CHECK-NEXT: ic %r2, 8(%r1)
+; CHECK-NEXT: srlg %r0, %r0, 56
+; CHECK-NEXT: stg %r3, 168(%r15)
+; CHECK-NEXT: stg %r4, 160(%r15)
+; CHECK-NEXT: stg %r2, 184(%r15)
+; CHECK-NEXT: la %r2, 176(%r15)
+; CHECK-NEXT: la %r3, 160(%r15)
+; CHECK-NEXT: stg %r0, 176(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lmg %r14, %r15, 304(%r15)
; CHECK-NEXT: br %r14
;
-; VECTOR-LABEL: fun6:
+; VECTOR-LABEL: call3:
; VECTOR: # %bb.0:
-; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
-; VECTOR-NEXT: .cfi_offset %r13, -56
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
; VECTOR-NEXT: .cfi_offset %r14, -48
; VECTOR-NEXT: .cfi_offset %r15, -40
; VECTOR-NEXT: aghi %r15, -192
; VECTOR-NEXT: .cfi_def_cfa_offset 352
-; VECTOR-NEXT: lgr %r13, %r2
-; VECTOR-NEXT: la %r2, 160(%r15)
-; VECTOR-NEXT: brasl %r14, foo6 at PLT
; VECTOR-NEXT: lgrl %r1, Src at GOT
-; VECTOR-NEXT: vl %v1, 168(%r15), 3
-; VECTOR-NEXT: vl %v2, 0(%r1), 3
-; VECTOR-NEXT: lh %r0, 184(%r15)
-; VECTOR-NEXT: lde %f0, 160(%r15)
-; VECTOR-NEXT: vaq %v1, %v1, %v2
-; VECTOR-NEXT: sth %r0, 24(%r13)
-; VECTOR-NEXT: vst %v1, 8(%r13), 3
-; VECTOR-NEXT: ste %f0, 0(%r13)
-; VECTOR-NEXT: lmg %r13, %r15, 296(%r15)
+; VECTOR-NEXT: vgbm %v0, 0
+; VECTOR-NEXT: vleb %v0, 8(%r1), 15
+; VECTOR-NEXT: vlrepg %v1, 0(%r1)
+; VECTOR-NEXT: vrepib %v2, 8
+; VECTOR-NEXT: vslb %v1, %v1, %v2
+; VECTOR-NEXT: vo %v0, %v0, %v1
+; VECTOR-NEXT: vl %v1, 16(%r1), 3
+; VECTOR-NEXT: la %r2, 176(%r15)
+; VECTOR-NEXT: la %r3, 160(%r15)
+; VECTOR-NEXT: vst %v1, 160(%r15), 3
+; VECTOR-NEXT: vst %v0, 176(%r15), 3
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: lmg %r14, %r15, 304(%r15)
; VECTOR-NEXT: br %r14
- %V = call %Ty6 @foo6()
- %Val1 = extractvalue %Ty6 %V, 1
- %Ld = load i128, ptr @Src
- %Add = add i128 %Val1, %Ld
- %Res = insertvalue %Ty6 %V, i128 %Add, 1
- ret %Ty6 %Res
+ %L = load %Ty3, ptr @Src
+ call void @Fnptr(%Ty3 %L)
+ ret void
}
-%Ty7 = type [4 x i128]
-declare fastcc %Ty7 @foo7()
-define fastcc %Ty7 @fun7() {
-; CHECK-LABEL: fun7:
+define %Ty3 @ret3() {
+; CHECK-LABEL: ret3:
; CHECK: # %bb.0:
-; CHECK-NEXT: stmg %r12, %r15, 96(%r15)
-; CHECK-NEXT: .cfi_offset %r12, -64
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
; CHECK-NEXT: .cfi_offset %r13, -56
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
-; CHECK-NEXT: aghi %r15, -224
-; CHECK-NEXT: .cfi_def_cfa_offset 384
+; CHECK-NEXT: aghi %r15, -192
+; CHECK-NEXT: .cfi_def_cfa_offset 352
; CHECK-NEXT: lgr %r13, %r2
; CHECK-NEXT: la %r2, 160(%r15)
-; CHECK-NEXT: brasl %r14, foo7 at PLT
-; CHECK-NEXT: lg %r0, 200(%r15)
-; CHECK-NEXT: lg %r1, 192(%r15)
-; CHECK-NEXT: lg %r2, 176(%r15)
-; CHECK-NEXT: lg %r3, 184(%r15)
-; CHECK-NEXT: lg %r4, 168(%r15)
-; CHECK-NEXT: lg %r5, 160(%r15)
-; CHECK-NEXT: lg %r14, 208(%r15)
-; CHECK-NEXT: lg %r12, 216(%r15)
-; CHECK-NEXT: algr %r4, %r3
-; CHECK-NEXT: alcgr %r5, %r2
-; CHECK-NEXT: stg %r14, 48(%r13)
-; CHECK-NEXT: stg %r12, 56(%r13)
-; CHECK-NEXT: stg %r1, 32(%r13)
-; CHECK-NEXT: stg %r0, 40(%r13)
-; CHECK-NEXT: stg %r2, 16(%r13)
-; CHECK-NEXT: stg %r3, 24(%r13)
-; CHECK-NEXT: stg %r4, 8(%r13)
-; CHECK-NEXT: stg %r5, 0(%r13)
-; CHECK-NEXT: lmg %r12, %r15, 320(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lg %r0, 176(%r15)
+; CHECK-NEXT: lg %r1, 184(%r15)
+; CHECK-NEXT: lg %r2, 160(%r15)
+; CHECK-NEXT: llgc %r3, 168(%r15)
+; CHECK-NEXT: stg %r0, 16(%r13)
+; CHECK-NEXT: stg %r1, 24(%r13)
+; CHECK-NEXT: stg %r2, 0(%r13)
+; CHECK-NEXT: stc %r3, 8(%r13)
+; CHECK-NEXT: lmg %r13, %r15, 296(%r15)
; CHECK-NEXT: br %r14
;
-; VECTOR-LABEL: fun7:
+; VECTOR-LABEL: ret3:
; VECTOR: # %bb.0:
; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
; VECTOR-NEXT: .cfi_offset %r13, -56
; VECTOR-NEXT: .cfi_offset %r14, -48
; VECTOR-NEXT: .cfi_offset %r15, -40
-; VECTOR-NEXT: aghi %r15, -224
-; VECTOR-NEXT: .cfi_def_cfa_offset 384
+; VECTOR-NEXT: aghi %r15, -192
+; VECTOR-NEXT: .cfi_def_cfa_offset 352
; VECTOR-NEXT: lgr %r13, %r2
; VECTOR-NEXT: la %r2, 160(%r15)
-; VECTOR-NEXT: brasl %r14, foo7 at PLT
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: vgbm %v0, 0
+; VECTOR-NEXT: vleb %v0, 168(%r15), 15
+; VECTOR-NEXT: vlrepg %v1, 160(%r15)
; VECTOR-NEXT: vl %v2, 176(%r15), 3
-; VECTOR-NEXT: vl %v3, 160(%r15), 3
-; VECTOR-NEXT: vl %v0, 192(%r15), 3
-; VECTOR-NEXT: vl %v1, 208(%r15), 3
-; VECTOR-NEXT: vaq %v3, %v3, %v2
-; VECTOR-NEXT: vst %v1, 48(%r13), 3
-; VECTOR-NEXT: vst %v0, 32(%r13), 3
; VECTOR-NEXT: vst %v2, 16(%r13), 3
-; VECTOR-NEXT: vst %v3, 0(%r13), 3
-; VECTOR-NEXT: lmg %r13, %r15, 328(%r15)
+; VECTOR-NEXT: vsteg %v1, 0(%r13), 1
+; VECTOR-NEXT: vsteb %v0, 8(%r13), 15
+; VECTOR-NEXT: lmg %r13, %r15, 296(%r15)
+; VECTOR-NEXT: br %r14
+ %C = call %Ty3 @Fnptr()
+ ret %Ty3 %C
+}
+
+%Ty4 = type {float, i8, i16, i32, i64, i128, i8}
+define void @arg4(%Ty4 %A) {
+; CHECK-LABEL: arg4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: l %r0, 164(%r15)
+; CHECK-NEXT: lgrl %r1, Dst at GOT
+; CHECK-NEXT: lg %r14, 0(%r6)
+; CHECK-NEXT: lg %r13, 8(%r6)
+; CHECK-NEXT: stc %r0, 40(%r1)
+; CHECK-NEXT: stg %r5, 16(%r1)
+; CHECK-NEXT: st %r4, 8(%r1)
+; CHECK-NEXT: sth %r3, 6(%r1)
+; CHECK-NEXT: stc %r2, 4(%r1)
+; CHECK-NEXT: ste %f0, 0(%r1)
+; CHECK-NEXT: stg %r13, 32(%r1)
+; CHECK-NEXT: stg %r14, 24(%r1)
+; CHECK-NEXT: lmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: arg4:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: vl %v1, 0(%r6), 3
+; VECTOR-NEXT: l %r0, 164(%r15)
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
+; VECTOR-NEXT: stc %r0, 40(%r1)
+; VECTOR-NEXT: stg %r5, 16(%r1)
+; VECTOR-NEXT: st %r4, 8(%r1)
+; VECTOR-NEXT: sth %r3, 6(%r1)
+; VECTOR-NEXT: stc %r2, 4(%r1)
+; VECTOR-NEXT: ste %f0, 0(%r1)
+; VECTOR-NEXT: vst %v1, 24(%r1), 3
+; VECTOR-NEXT: br %r14
+ store %Ty4 %A, ptr @Dst
+ ret void
+}
+
+define void @call4() {
+; CHECK-LABEL: call4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r6, %r15, 48(%r15)
+; CHECK-NEXT: .cfi_offset %r6, -112
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -184
+; CHECK-NEXT: .cfi_def_cfa_offset 344
+; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: lg %r5, 16(%r1)
+; CHECK-NEXT: l %r4, 8(%r1)
+; CHECK-NEXT: le %f0, 0(%r1)
+; CHECK-NEXT: lg %r0, 24(%r1)
+; CHECK-NEXT: lb %r14, 40(%r1)
+; CHECK-NEXT: lg %r13, 32(%r1)
+; CHECK-NEXT: lh %r3, 6(%r1)
+; CHECK-NEXT: lb %r2, 4(%r1)
+; CHECK-NEXT: st %r14, 164(%r15)
+; CHECK-NEXT: stg %r13, 176(%r15)
+; CHECK-NEXT: la %r6, 168(%r15)
+; CHECK-NEXT: stg %r0, 168(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lmg %r6, %r15, 232(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: call4:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r6, %r15, 48(%r15)
+; VECTOR-NEXT: .cfi_offset %r6, -112
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -184
+; VECTOR-NEXT: .cfi_def_cfa_offset 344
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: lh %r3, 6(%r1)
+; VECTOR-NEXT: lb %r2, 4(%r1)
+; VECTOR-NEXT: lb %r0, 40(%r1)
+; VECTOR-NEXT: lg %r5, 16(%r1)
+; VECTOR-NEXT: l %r4, 8(%r1)
+; VECTOR-NEXT: lde %f0, 0(%r1)
+; VECTOR-NEXT: vl %v1, 24(%r1), 3
+; VECTOR-NEXT: la %r6, 168(%r15)
+; VECTOR-NEXT: st %r0, 164(%r15)
+; VECTOR-NEXT: vst %v1, 168(%r15), 3
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: lmg %r6, %r15, 232(%r15)
; VECTOR-NEXT: br %r14
- %V = call %Ty7 @foo7()
- %Val0 = extractvalue %Ty7 %V, 0
- %Val1 = extractvalue %Ty7 %V, 1
- %Add = add i128 %Val0, %Val1
- %Res = insertvalue %Ty7 %V, i128 %Add, 0
- ret %Ty7 %Res
+ %L = load %Ty4, ptr @Src
+ call void @Fnptr(%Ty4 %L)
+ ret void
}
-%Ty8 = type {float, [2 x i128], i32}
-declare fastcc %Ty8 @foo8()
-define fastcc %Ty8 @fun8() {
-; CHECK-LABEL: fun8:
+define %Ty4 @ret4() {
+; CHECK-LABEL: ret4:
; CHECK: # %bb.0:
; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
; CHECK-NEXT: .cfi_offset %r13, -56
@@ -479,24 +568,27 @@ define fastcc %Ty8 @fun8() {
; CHECK-NEXT: .cfi_def_cfa_offset 368
; CHECK-NEXT: lgr %r13, %r2
; CHECK-NEXT: la %r2, 160(%r15)
-; CHECK-NEXT: brasl %r14, foo8 at PLT
-; CHECK-NEXT: lg %r0, 176(%r15)
-; CHECK-NEXT: lg %r1, 168(%r15)
-; CHECK-NEXT: le %f0, 160(%r15)
-; CHECK-NEXT: lhi %r2, 1
-; CHECK-NEXT: a %r2, 200(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lb %r0, 164(%r15)
+; CHECK-NEXT: lh %r1, 166(%r15)
+; CHECK-NEXT: lg %r2, 192(%r15)
; CHECK-NEXT: lg %r3, 184(%r15)
-; CHECK-NEXT: lg %r4, 192(%r15)
+; CHECK-NEXT: le %f0, 160(%r15)
+; CHECK-NEXT: l %r4, 168(%r15)
+; CHECK-NEXT: lg %r5, 176(%r15)
+; CHECK-NEXT: lb %r14, 200(%r15)
; CHECK-NEXT: ste %f0, 0(%r13)
-; CHECK-NEXT: st %r2, 40(%r13)
+; CHECK-NEXT: st %r4, 8(%r13)
+; CHECK-NEXT: stg %r5, 16(%r13)
+; CHECK-NEXT: stc %r14, 40(%r13)
; CHECK-NEXT: stg %r3, 24(%r13)
-; CHECK-NEXT: stg %r4, 32(%r13)
-; CHECK-NEXT: stg %r1, 8(%r13)
-; CHECK-NEXT: stg %r0, 16(%r13)
+; CHECK-NEXT: stg %r2, 32(%r13)
+; CHECK-NEXT: sth %r1, 6(%r13)
+; CHECK-NEXT: stc %r0, 4(%r13)
; CHECK-NEXT: lmg %r13, %r15, 312(%r15)
; CHECK-NEXT: br %r14
;
-; VECTOR-LABEL: fun8:
+; VECTOR-LABEL: ret4:
; VECTOR: # %bb.0:
; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
; VECTOR-NEXT: .cfi_offset %r13, -56
@@ -506,45 +598,309 @@ define fastcc %Ty8 @fun8() {
; VECTOR-NEXT: .cfi_def_cfa_offset 368
; VECTOR-NEXT: lgr %r13, %r2
; VECTOR-NEXT: la %r2, 160(%r15)
-; VECTOR-NEXT: brasl %r14, foo8 at PLT
-; VECTOR-NEXT: lhi %r0, 1
-; VECTOR-NEXT: a %r0, 200(%r15)
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: lb %r0, 164(%r15)
+; VECTOR-NEXT: lh %r1, 166(%r15)
+; VECTOR-NEXT: lb %r4, 200(%r15)
; VECTOR-NEXT: lde %f0, 160(%r15)
-; VECTOR-NEXT: vl %v1, 168(%r15), 3
-; VECTOR-NEXT: vl %v2, 184(%r15), 3
-; VECTOR-NEXT: st %r0, 40(%r13)
-; VECTOR-NEXT: vst %v2, 24(%r13), 3
-; VECTOR-NEXT: vst %v1, 8(%r13), 3
+; VECTOR-NEXT: l %r2, 168(%r15)
+; VECTOR-NEXT: lg %r3, 176(%r15)
+; VECTOR-NEXT: vl %v1, 184(%r15), 3
+; VECTOR-NEXT: stc %r4, 40(%r13)
+; VECTOR-NEXT: vst %v1, 24(%r13), 3
+; VECTOR-NEXT: stg %r3, 16(%r13)
+; VECTOR-NEXT: st %r2, 8(%r13)
+; VECTOR-NEXT: sth %r1, 6(%r13)
+; VECTOR-NEXT: stc %r0, 4(%r13)
; VECTOR-NEXT: ste %f0, 0(%r13)
; VECTOR-NEXT: lmg %r13, %r15, 312(%r15)
; VECTOR-NEXT: br %r14
- %V = call %Ty8 @foo8()
- %Val2 = extractvalue %Ty8 %V, 2
- %Add = add i32 %Val2, 1
- %Res = insertvalue %Ty8 %V, i32 %Add, 2
- ret %Ty8 %Res
+ %C = call %Ty4 @Fnptr()
+ ret %Ty4 %C
}
-%Ty9 = type {i72}
-declare fastcc %Ty9 @foo9(%Ty9)
-define fastcc %Ty9 @fun9(%Ty9 %A) {
-; CHECK-LABEL: fun9:
+%Ty5 = type [4 x i128]
+define void @arg5(%Ty5 %A) {
+; CHECK-LABEL: arg5:
; CHECK: # %bb.0:
-; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: stmg %r12, %r15, 96(%r15)
+; CHECK-NEXT: .cfi_offset %r12, -64
+; CHECK-NEXT: .cfi_offset %r13, -56
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
-; CHECK-NEXT: aghi %r15, -176
-; CHECK-NEXT: .cfi_def_cfa_offset 336
-; CHECK-NEXT: lg %r0, 8(%r2)
-; CHECK-NEXT: lg %r1, 0(%r2)
-; CHECK-NEXT: stg %r0, 168(%r15)
+; CHECK-NEXT: lg %r0, 0(%r2)
+; CHECK-NEXT: lg %r1, 8(%r2)
+; CHECK-NEXT: lg %r2, 0(%r3)
+; CHECK-NEXT: lg %r3, 8(%r3)
+; CHECK-NEXT: lg %r14, 8(%r5)
+; CHECK-NEXT: lgrl %r13, Dst at GOT
+; CHECK-NEXT: lg %r5, 0(%r5)
+; CHECK-NEXT: lg %r12, 8(%r4)
+; CHECK-NEXT: lg %r4, 0(%r4)
+; CHECK-NEXT: stg %r14, 56(%r13)
+; CHECK-NEXT: stg %r5, 48(%r13)
+; CHECK-NEXT: stg %r12, 40(%r13)
+; CHECK-NEXT: stg %r4, 32(%r13)
+; CHECK-NEXT: stg %r3, 24(%r13)
+; CHECK-NEXT: stg %r2, 16(%r13)
+; CHECK-NEXT: stg %r1, 8(%r13)
+; CHECK-NEXT: stg %r0, 0(%r13)
+; CHECK-NEXT: lmg %r12, %r15, 96(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: arg5:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: vl %v0, 0(%r2), 3
+; VECTOR-NEXT: vl %v1, 0(%r3), 3
+; VECTOR-NEXT: vl %v2, 0(%r4), 3
+; VECTOR-NEXT: vl %v3, 0(%r5), 3
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
+; VECTOR-NEXT: vst %v3, 48(%r1), 3
+; VECTOR-NEXT: vst %v2, 32(%r1), 3
+; VECTOR-NEXT: vst %v1, 16(%r1), 3
+; VECTOR-NEXT: vst %v0, 0(%r1), 3
+; VECTOR-NEXT: br %r14
+ store %Ty5 %A, ptr @Dst
+ ret void
+}
+
+define void @call5() {
+; CHECK-LABEL: call5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -224
+; CHECK-NEXT: .cfi_def_cfa_offset 384
+; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: lg %r0, 0(%r1)
+; CHECK-NEXT: lg %r2, 8(%r1)
+; CHECK-NEXT: lg %r3, 16(%r1)
+; CHECK-NEXT: lg %r4, 24(%r1)
+; CHECK-NEXT: lg %r5, 56(%r1)
+; CHECK-NEXT: lg %r14, 48(%r1)
+; CHECK-NEXT: lg %r13, 40(%r1)
+; CHECK-NEXT: lg %r1, 32(%r1)
+; CHECK-NEXT: stg %r5, 168(%r15)
+; CHECK-NEXT: stg %r14, 160(%r15)
+; CHECK-NEXT: stg %r13, 184(%r15)
+; CHECK-NEXT: stg %r1, 176(%r15)
+; CHECK-NEXT: stg %r4, 200(%r15)
+; CHECK-NEXT: stg %r3, 192(%r15)
+; CHECK-NEXT: stg %r2, 216(%r15)
+; CHECK-NEXT: la %r2, 208(%r15)
+; CHECK-NEXT: la %r3, 192(%r15)
+; CHECK-NEXT: la %r4, 176(%r15)
+; CHECK-NEXT: la %r5, 160(%r15)
+; CHECK-NEXT: stg %r0, 208(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lmg %r13, %r15, 328(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: call5:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -224
+; VECTOR-NEXT: .cfi_def_cfa_offset 384
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: vl %v0, 0(%r1), 3
+; VECTOR-NEXT: vl %v1, 16(%r1), 3
+; VECTOR-NEXT: vl %v2, 32(%r1), 3
+; VECTOR-NEXT: vl %v3, 48(%r1), 3
+; VECTOR-NEXT: la %r2, 208(%r15)
+; VECTOR-NEXT: la %r3, 192(%r15)
+; VECTOR-NEXT: la %r4, 176(%r15)
+; VECTOR-NEXT: la %r5, 160(%r15)
+; VECTOR-NEXT: vst %v3, 160(%r15), 3
+; VECTOR-NEXT: vst %v2, 176(%r15), 3
+; VECTOR-NEXT: vst %v1, 192(%r15), 3
+; VECTOR-NEXT: vst %v0, 208(%r15), 3
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: lmg %r14, %r15, 336(%r15)
+; VECTOR-NEXT: br %r14
+ %L = load %Ty5, ptr @Src
+ call void @Fnptr(%Ty5 %L)
+ ret void
+}
+
+define %Ty5 @ret5() {
+; CHECK-LABEL: ret5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r12, %r15, 96(%r15)
+; CHECK-NEXT: .cfi_offset %r12, -64
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -224
+; CHECK-NEXT: .cfi_def_cfa_offset 384
+; CHECK-NEXT: lgr %r13, %r2
; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lg %r0, 168(%r15)
+; CHECK-NEXT: lg %r1, 160(%r15)
+; CHECK-NEXT: lg %r2, 184(%r15)
+; CHECK-NEXT: lg %r3, 176(%r15)
+; CHECK-NEXT: lg %r4, 208(%r15)
+; CHECK-NEXT: lg %r5, 216(%r15)
+; CHECK-NEXT: lg %r14, 192(%r15)
+; CHECK-NEXT: lg %r12, 200(%r15)
+; CHECK-NEXT: stg %r4, 48(%r13)
+; CHECK-NEXT: stg %r5, 56(%r13)
+; CHECK-NEXT: stg %r14, 32(%r13)
+; CHECK-NEXT: stg %r12, 40(%r13)
+; CHECK-NEXT: stg %r3, 16(%r13)
+; CHECK-NEXT: stg %r2, 24(%r13)
+; CHECK-NEXT: stg %r1, 0(%r13)
+; CHECK-NEXT: stg %r0, 8(%r13)
+; CHECK-NEXT: lmg %r12, %r15, 320(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: ret5:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
+; VECTOR-NEXT: .cfi_offset %r13, -56
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -224
+; VECTOR-NEXT: .cfi_def_cfa_offset 384
+; VECTOR-NEXT: lgr %r13, %r2
+; VECTOR-NEXT: la %r2, 160(%r15)
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: vl %v0, 160(%r15), 3
+; VECTOR-NEXT: vl %v1, 176(%r15), 3
+; VECTOR-NEXT: vl %v2, 192(%r15), 3
+; VECTOR-NEXT: vl %v3, 208(%r15), 3
+; VECTOR-NEXT: vst %v3, 48(%r13), 3
+; VECTOR-NEXT: vst %v2, 32(%r13), 3
+; VECTOR-NEXT: vst %v1, 16(%r13), 3
+; VECTOR-NEXT: vst %v0, 0(%r13), 3
+; VECTOR-NEXT: lmg %r13, %r15, 328(%r15)
+; VECTOR-NEXT: br %r14
+ %C = call %Ty5 @Fnptr()
+ ret %Ty5 %C
+}
+
+%Ty6 = type [2 x i72]
+define void @arg6(%Ty6 %A) {
+; CHECK-LABEL: arg6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lg %r0, 8(%r3)
+; CHECK-NEXT: lgrl %r1, Dst at GOT
+; CHECK-NEXT: lg %r4, 8(%r2)
+; CHECK-NEXT: lg %r3, 0(%r3)
+; CHECK-NEXT: lg %r2, 0(%r2)
+; CHECK-NEXT: stc %r0, 24(%r1)
+; CHECK-NEXT: stc %r4, 8(%r1)
+; CHECK-NEXT: sllg %r3, %r3, 56
+; CHECK-NEXT: rosbg %r3, %r0, 8, 63, 56
+; CHECK-NEXT: stg %r3, 16(%r1)
+; CHECK-NEXT: sllg %r0, %r2, 56
+; CHECK-NEXT: rosbg %r0, %r4, 8, 63, 56
+; CHECK-NEXT: stg %r0, 0(%r1)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: arg6:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: vl %v0, 0(%r2), 3
+; VECTOR-NEXT: vl %v1, 0(%r3), 3
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
+; VECTOR-NEXT: vsteb %v1, 24(%r1), 15
+; VECTOR-NEXT: vrepib %v2, 8
+; VECTOR-NEXT: vsteb %v0, 8(%r1), 15
+; VECTOR-NEXT: vsrlb %v1, %v1, %v2
+; VECTOR-NEXT: vsrlb %v0, %v0, %v2
+; VECTOR-NEXT: vsteg %v1, 16(%r1), 1
+; VECTOR-NEXT: vsteg %v0, 0(%r1), 1
+; VECTOR-NEXT: br %r14
+ store %Ty6 %A, ptr @Dst
+ ret void
+}
+
+define void @call6() {
+; CHECK-LABEL: call6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -192
+; CHECK-NEXT: .cfi_def_cfa_offset 352
+; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: lg %r0, 0(%r1)
+; CHECK-NEXT: lg %r2, 16(%r1)
+; CHECK-NEXT: sllg %r3, %r0, 8
+; CHECK-NEXT: sllg %r4, %r2, 8
+; CHECK-NEXT: ic %r4, 24(%r1)
+; CHECK-NEXT: ic %r3, 8(%r1)
+; CHECK-NEXT: srlg %r0, %r0, 56
+; CHECK-NEXT: srlg %r1, %r2, 56
+; CHECK-NEXT: stg %r4, 168(%r15)
; CHECK-NEXT: stg %r1, 160(%r15)
-; CHECK-NEXT: brasl %r14, foo9 at PLT
-; CHECK-NEXT: lmg %r14, %r15, 288(%r15)
+; CHECK-NEXT: stg %r3, 184(%r15)
+; CHECK-NEXT: la %r2, 176(%r15)
+; CHECK-NEXT: la %r3, 160(%r15)
+; CHECK-NEXT: stg %r0, 176(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: call6:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -192
+; VECTOR-NEXT: .cfi_def_cfa_offset 352
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: vgbm %v1, 0
+; VECTOR-NEXT: vleb %v1, 8(%r1), 15
+; VECTOR-NEXT: vlrepg %v2, 0(%r1)
+; VECTOR-NEXT: vrepib %v3, 8
+; VECTOR-NEXT: vslb %v2, %v2, %v3
+; VECTOR-NEXT: vgbm %v0, 0
+; VECTOR-NEXT: vo %v1, %v1, %v2
+; VECTOR-NEXT: vleb %v0, 24(%r1), 15
+; VECTOR-NEXT: vlrepg %v2, 16(%r1)
+; VECTOR-NEXT: vslb %v2, %v2, %v3
+; VECTOR-NEXT: vo %v0, %v0, %v2
+; VECTOR-NEXT: la %r2, 176(%r15)
+; VECTOR-NEXT: la %r3, 160(%r15)
+; VECTOR-NEXT: vst %v0, 160(%r15), 3
+; VECTOR-NEXT: vst %v1, 176(%r15), 3
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT: br %r14
+ %L = load %Ty6, ptr @Src
+ call void @Fnptr(%Ty6 %L)
+ ret void
+}
+
+define %Ty6 @ret6() {
+; CHECK-LABEL: ret6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: .cfi_offset %r13, -56
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -192
+; CHECK-NEXT: .cfi_def_cfa_offset 352
+; CHECK-NEXT: lgr %r13, %r2
+; CHECK-NEXT: la %r2, 160(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lg %r0, 176(%r15)
+; CHECK-NEXT: llgc %r1, 184(%r15)
+; CHECK-NEXT: lg %r2, 160(%r15)
+; CHECK-NEXT: llgc %r3, 168(%r15)
+; CHECK-NEXT: stg %r0, 16(%r13)
+; CHECK-NEXT: stc %r1, 24(%r13)
+; CHECK-NEXT: stg %r2, 0(%r13)
+; CHECK-NEXT: stc %r3, 8(%r13)
+; CHECK-NEXT: lmg %r13, %r15, 296(%r15)
; CHECK-NEXT: br %r14
;
-; VECTOR-LABEL: fun9:
+; VECTOR-LABEL: ret6:
; VECTOR: # %bb.0:
; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
; VECTOR-NEXT: .cfi_offset %r13, -56
@@ -552,19 +908,97 @@ define fastcc %Ty9 @fun9(%Ty9 %A) {
; VECTOR-NEXT: .cfi_offset %r15, -40
; VECTOR-NEXT: aghi %r15, -192
; VECTOR-NEXT: .cfi_def_cfa_offset 352
-; VECTOR-NEXT: vl %v0, 0(%r3), 3
; VECTOR-NEXT: lgr %r13, %r2
; VECTOR-NEXT: la %r2, 160(%r15)
-; VECTOR-NEXT: la %r3, 176(%r15)
-; VECTOR-NEXT: vst %v0, 176(%r15), 3
-; VECTOR-NEXT: brasl %r14, foo9 at PLT
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
; VECTOR-NEXT: vgbm %v0, 0
-; VECTOR-NEXT: vleb %v0, 168(%r15), 15
-; VECTOR-NEXT: vlrepg %v1, 160(%r15)
-; VECTOR-NEXT: vsteg %v1, 0(%r13), 1
-; VECTOR-NEXT: vsteb %v0, 8(%r13), 15
+; VECTOR-NEXT: vgbm %v1, 0
+; VECTOR-NEXT: vleb %v1, 168(%r15), 15
+; VECTOR-NEXT: vleb %v0, 184(%r15), 15
+; VECTOR-NEXT: vlrepg %v2, 160(%r15)
+; VECTOR-NEXT: vlrepg %v3, 176(%r15)
+; VECTOR-NEXT: vsteg %v3, 16(%r13), 1
+; VECTOR-NEXT: vsteb %v0, 24(%r13), 15
+; VECTOR-NEXT: vsteg %v2, 0(%r13), 1
+; VECTOR-NEXT: vsteb %v1, 8(%r13), 15
; VECTOR-NEXT: lmg %r13, %r15, 296(%r15)
; VECTOR-NEXT: br %r14
- %Res = call %Ty9 @foo9(%Ty9 %A)
- ret %Ty9 %Res
+ %C = call %Ty6 @Fnptr()
+ ret %Ty6 %C
+}
+
+%Ty7 = type {i128}
+define void @arg7(%Ty7 %A, %Ty7 %B) {
+; CHECK-LABEL: arg7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lg %r0, 8(%r2)
+; CHECK-NEXT: lgrl %r1, Dst at GOT
+; CHECK-NEXT: lg %r2, 0(%r2)
+; CHECK-NEXT: lg %r4, 8(%r3)
+; CHECK-NEXT: lg %r3, 0(%r3)
+; CHECK-NEXT: stg %r0, 8(%r1)
+; CHECK-NEXT: stg %r2, 0(%r1)
+; CHECK-NEXT: stg %r4, 24(%r1)
+; CHECK-NEXT: stg %r3, 16(%r1)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: arg7:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: vl %v0, 0(%r3), 3
+; VECTOR-NEXT: vl %v1, 0(%r2), 3
+; VECTOR-NEXT: lgrl %r1, Dst at GOT
+; VECTOR-NEXT: vst %v1, 0(%r1), 3
+; VECTOR-NEXT: vst %v0, 16(%r1), 3
+; VECTOR-NEXT: br %r14
+ store %Ty7 %A, ptr @Dst
+ %D2 = getelementptr %Ty7, ptr @Dst, i32 1
+ store %Ty7 %B, ptr %D2
+ ret void
+}
+
+define void @call7() {
+; CHECK-LABEL: call7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -192
+; CHECK-NEXT: .cfi_def_cfa_offset 352
+; CHECK-NEXT: lgrl %r1, Src at GOT
+; CHECK-NEXT: lg %r0, 24(%r1)
+; CHECK-NEXT: lg %r2, 16(%r1)
+; CHECK-NEXT: lg %r3, 8(%r1)
+; CHECK-NEXT: lg %r1, 0(%r1)
+; CHECK-NEXT: stg %r0, 168(%r15)
+; CHECK-NEXT: stg %r2, 160(%r15)
+; CHECK-NEXT: stg %r3, 184(%r15)
+; CHECK-NEXT: la %r2, 176(%r15)
+; CHECK-NEXT: la %r3, 160(%r15)
+; CHECK-NEXT: stg %r1, 176(%r15)
+; CHECK-NEXT: brasl %r14, Fnptr at PLT
+; CHECK-NEXT: lmg %r14, %r15, 304(%r15)
+; CHECK-NEXT: br %r14
+;
+; VECTOR-LABEL: call7:
+; VECTOR: # %bb.0:
+; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
+; VECTOR-NEXT: .cfi_offset %r14, -48
+; VECTOR-NEXT: .cfi_offset %r15, -40
+; VECTOR-NEXT: aghi %r15, -192
+; VECTOR-NEXT: .cfi_def_cfa_offset 352
+; VECTOR-NEXT: lgrl %r1, Src at GOT
+; VECTOR-NEXT: vl %v0, 0(%r1), 3
+; VECTOR-NEXT: vl %v1, 16(%r1), 3
+; VECTOR-NEXT: la %r2, 176(%r15)
+; VECTOR-NEXT: la %r3, 160(%r15)
+; VECTOR-NEXT: vst %v1, 160(%r15), 3
+; VECTOR-NEXT: vst %v0, 176(%r15), 3
+; VECTOR-NEXT: brasl %r14, Fnptr at PLT
+; VECTOR-NEXT: lmg %r14, %r15, 304(%r15)
+; VECTOR-NEXT: br %r14
+ %L = load %Ty7, ptr @Src
+ %S2 = getelementptr %Ty7, ptr @Src, i32 1
+ %L2 = load %Ty7, ptr %S2
+ call void @Fnptr(%Ty7 %L, %Ty7 %L2)
+ ret void
}
More information about the llvm-commits
mailing list