[llvm] c218664 - [PowerPC][AIX] Implement by-val caller arguments in a single register.
Sean Fertile via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 18 07:58:19 PDT 2020
Author: Chris Bowler
Date: 2020-03-18T10:57:28-04:00
New Revision: c21866476e144377102ed358455dc105b4977764
URL: https://github.com/llvm/llvm-project/commit/c21866476e144377102ed358455dc105b4977764
DIFF: https://github.com/llvm/llvm-project/commit/c21866476e144377102ed358455dc105b4977764.diff
LOG: [PowerPC][AIX] Implement by-val caller arguments in a single register.
This is the first of a series of patches that adds caller support for
by-value arguments. This patch add support for arguments that are passed in a
single GPR.
There are 3 limitation cases:
-The by-value argument is larger than a single register.
-There are no remaining GPRs even though the by-value argument would
otherwise fit in a single GPR.
-The by-value argument requires alignment greater than register width.
Future patches will be required to add support for these cases as well
as for the callee handling (in LowerFormalArguments_AIX) that
corresponds to this work.
Differential Revision: https://reviews.llvm.org/D75863
Added:
llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll
llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll
llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll
llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Removed:
llvm/test/CodeGen/PowerPC/aix-byval-param.ll
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 22e7f8a7b97e..2d718011059a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -6841,9 +6841,6 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
if (ValVT == MVT::f128)
report_fatal_error("f128 is unimplemented on AIX.");
- if (ArgFlags.isByVal())
- report_fatal_error("Passing structure by value is unimplemented.");
-
if (ArgFlags.isNest())
report_fatal_error("Nest arguments are unimplemented.");
@@ -6857,6 +6854,29 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10};
+ if (ArgFlags.isByVal()) {
+ if (ArgFlags.getNonZeroByValAlign() > PtrByteSize)
+ report_fatal_error("Pass-by-value arguments with alignment greater than "
+ "register width are not supported.");
+
+ const unsigned ByValSize = ArgFlags.getByValSize();
+
+ // An empty aggregate parameter takes up no storage and no registers.
+ if (ByValSize == 0)
+ return false;
+
+ if (ByValSize <= PtrByteSize) {
+ State.AllocateStack(PtrByteSize, PtrByteSize);
+ if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
+ return false;
+ }
+ }
+
+ report_fatal_error(
+ "Pass-by-value arguments are only supported in a single register.");
+ }
+
// Arguments always reserve parameter save area.
switch (ValVT.SimpleTy) {
default:
@@ -7130,9 +7150,59 @@ SDValue PPCTargetLowering::LowerCall_AIX(
CCValAssign &VA = ArgLocs[I++];
SDValue Arg = OutVals[VA.getValNo()];
+ ISD::ArgFlagsTy Flags = Outs[VA.getValNo()].Flags;
+ const MVT LocVT = VA.getLocVT();
+ const MVT ValVT = VA.getValVT();
+
+ if (Flags.isByVal()) {
+ const unsigned ByValSize = Flags.getByValSize();
+ assert(
+ VA.isRegLoc() && ByValSize > 0 && ByValSize <= PtrByteSize &&
+ "Pass-by-value arguments are only supported in a single register.");
+
+ // Loads must be a power-of-2 size and cannot be larger than the
+ // ByValSize. For example: a 7 byte by-val arg requires 4, 2 and 1 byte
+ // loads.
+ SDValue RegVal;
+ for (unsigned Bytes = 0; Bytes != ByValSize;) {
+ unsigned N = PowerOf2Floor(ByValSize - Bytes);
+ const MVT VT =
+ N == 1 ? MVT::i8
+ : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
+
+ SDValue LoadAddr = Arg;
+ if (Bytes != 0) {
+ // Adjust the load offset by the number of bytes read so far.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+ LoadAddr = DAG.getNode(ISD::ADD, dl, LocVT, Arg,
+ DAG.getConstant(Bytes, dl, LocVT), Flags);
+ }
+ SDValue Load = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, LoadAddr,
+ MachinePointerInfo(), VT);
+ MemOpChains.push_back(Load.getValue(1));
- if (!VA.isRegLoc() && !VA.isMemLoc())
- report_fatal_error("Unexpected location for function call argument.");
+ Bytes += N;
+ assert(LocVT.getSizeInBits() >= (Bytes * 8));
+ if (unsigned NumSHLBits = LocVT.getSizeInBits() - (Bytes * 8)) {
+ // By-val arguments are passed left-justfied in register.
+ EVT ShiftAmountTy =
+ getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
+ SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
+ SDValue ShiftedLoad =
+ DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
+ RegVal = RegVal ? DAG.getNode(ISD::OR, dl, LocVT, RegVal, ShiftedLoad)
+ : ShiftedLoad;
+ } else {
+ assert(!RegVal && Bytes == ByValSize &&
+ "Pass-by-value argument handling unexpectedly incomplete.");
+ RegVal = Load;
+ }
+ }
+
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), RegVal));
+ continue;
+ }
switch (VA.getLocInfo()) {
default:
@@ -7165,20 +7235,20 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// Custom handling is used for GPR initializations for vararg float
// arguments.
assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
- VA.getValVT().isFloatingPoint() && VA.getLocVT().isInteger() &&
+ ValVT.isFloatingPoint() && LocVT.isInteger() &&
"Unexpected register handling for calling convention.");
SDValue ArgAsInt =
- DAG.getBitcast(MVT::getIntegerVT(VA.getValVT().getSizeInBits()), Arg);
+ DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
- if (Arg.getValueType().getStoreSize() == VA.getLocVT().getStoreSize())
+ if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
// f32 in 32-bit GPR
// f64 in 64-bit GPR
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
- else if (Arg.getValueType().getSizeInBits() < VA.getLocVT().getSizeInBits())
+ else if (Arg.getValueType().getSizeInBits() < LocVT.getSizeInBits())
// f32 in 64-bit GPR.
RegsToPass.push_back(std::make_pair(
- VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, VA.getLocVT())));
+ VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
else {
// f64 in two 32-bit GPRs
// The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
diff --git a/llvm/test/CodeGen/PowerPC/aix-byval-param.ll b/llvm/test/CodeGen/PowerPC/aix-byval-param.ll
deleted file mode 100644
index 2dfdf7f8535f..000000000000
--- a/llvm/test/CodeGen/PowerPC/aix-byval-param.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
-; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
-
-%struct.S = type { i32, i32 }
-
-define void @bar() {
-entry:
- %s1 = alloca %struct.S, align 4
- %agg.tmp = alloca %struct.S, align 4
- call void @foo(%struct.S* byval(%struct.S) align 4 %agg.tmp)
- ret void
-}
-
-declare void @foo(%struct.S* byval(%struct.S) align 4)
-
-; CHECK: LLVM ERROR: Passing structure by value is unimplemented.
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll
new file mode 100644
index 000000000000..745323ce8a5d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll
@@ -0,0 +1,16 @@
+; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
+; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
+
+%struct.S = type { [9 x i8] }
+
+define void @bar() {
+entry:
+ %s1 = alloca %struct.S, align 1
+ %agg.tmp = alloca %struct.S, align 1
+ call void @foo(%struct.S* byval(%struct.S) align 1 %agg.tmp)
+ ret void
+}
+
+declare void @foo(%struct.S* byval(%struct.S) align 1)
+
+; CHECK: LLVM ERROR: Pass-by-value arguments are only supported in a single register.
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll
new file mode 100644
index 000000000000..b9c48272d147
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll
@@ -0,0 +1,16 @@
+; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
+; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
+
+%struct.S = type { [1 x i8] }
+
+define void @bar() {
+entry:
+ %s1 = alloca %struct.S, align 1
+ %agg.tmp = alloca %struct.S, align 1
+ call void @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, %struct.S* byval(%struct.S) align 1 %agg.tmp)
+ ret void
+}
+
+declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, %struct.S* byval(%struct.S) align 1)
+
+; CHECK: LLVM ERROR: Pass-by-value arguments are only supported in a single register.
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll
new file mode 100644
index 000000000000..357acfe595bb
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll
@@ -0,0 +1,16 @@
+; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
+; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
+
+%struct.S = type { [1 x i8] }
+
+define void @bar() {
+entry:
+ %s1 = alloca %struct.S, align 32
+ %agg.tmp = alloca %struct.S, align 32
+ call void @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, %struct.S* byval(%struct.S) align 32 %agg.tmp)
+ ret void
+}
+
+declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, %struct.S* byval(%struct.S) align 32)
+
+; CHECK: LLVM ERROR: Pass-by-value arguments with alignment greater than register width are not supported.
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
new file mode 100644
index 000000000000..0f90da66d60d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
@@ -0,0 +1,206 @@
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \
+; RUN: FileCheck --check-prefixes=CHECK,32BIT %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \
+; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN: FileCheck --check-prefixes=CHECKASM,ASM32PWR4 %s
+
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \
+; RUN: FileCheck --check-prefixes=CHECK,64BIT %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \
+; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN: FileCheck --check-prefixes=CHECKASM,ASM64PWR4 %s
+
+%struct.S1 = type { [1 x i8] }
+ at gS1 = external global %struct.S1, align 1
+
+define void @call_test_byval_1Byte() {
+entry:
+ call void @test_byval_1Byte(%struct.S1* byval(%struct.S1) align 1 @gS1)
+ ret void
+}
+
+declare void @test_byval_1Byte(%struct.S1* byval(%struct.S1) align 1)
+
+; CHECK-LABEL: name: call_test_byval_1Byte{{.*}}
+
+; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @gS1, $r2 :: (load 4 from got)
+; 32BIT-NEXT: renamable $r3 = LBZ 0, killed renamable $r[[REG]] :: (load 1)
+; 32BIT-NEXT: renamable $r3 = RLWINM killed renamable $r3, 24, 0, 7
+; 32BIT-NEXT: BL_NOP <mcsymbol .test_byval_1Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1
+; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_byval_1Byte:
+
+; ASM32PWR4: stwu 1, -64(1)
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM32PWR4-NEXT: lbz 3, 0([[REG]])
+; ASM32PWR4-NEXT: slwi 3, 3, 24
+; ASM32PWR4-NEXT: bl .test_byval_1Byte
+; ASM32PWR4-NEXT: nop
+; ASM32PWR4-NEXT: addi 1, 1, 64
+
+; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @gS1, $x2 :: (load 8 from got)
+; 64BIT-NEXT: renamable $x3 = LBZ8 0, killed renamable $x[[REG]] :: (load 1)
+; 64BIT-NEXT: renamable $x3 = RLDICR killed renamable $x3, 56, 7
+; 64BIT-NEXT: BL8_NOP <mcsymbol .test_byval_1Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4: std 0, 16(1)
+; ASM64PWR4-NEXT: stdu 1, -112(1)
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64PWR4-NEXT: lbz 3, 0([[REG]])
+; ASM64PWR4-NEXT: sldi 3, 3, 56
+; ASM64PWR4-NEXT: bl .test_byval_1Byte
+; ASM64PWR4-NEXT: nop
+; ASM64PWR4-NEXT: addi 1, 1, 112
+
+%struct.S2 = type { [2 x i8] }
+
+ at gS2 = external global %struct.S2, align 1
+
+define void @call_test_byval_2Byte() {
+entry:
+ call void @test_byval_2Byte(%struct.S2* byval(%struct.S2) align 1 @gS2)
+ ret void
+}
+
+declare void @test_byval_2Byte(%struct.S2* byval(%struct.S2) align 1)
+
+; CHECK-LABEL: name: call_test_byval_2Byte{{.*}}
+
+; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @gS2, $r2 :: (load 4 from got)
+; 32BIT-NEXT: renamable $r3 = LHZ 0, killed renamable $r[[REG]] :: (load 2)
+; 32BIT-NEXT: renamable $r3 = RLWINM killed renamable $r3, 16, 0, 15
+; 32BIT-NEXT: BL_NOP <mcsymbol .test_byval_2Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1
+; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_byval_2Byte:
+
+; ASM32PWR4: stwu 1, -64(1)
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM32PWR4-NEXT: lhz 3, 0([[REG]])
+; ASM32PWR4-NEXT: slwi 3, 3, 16
+; ASM32PWR4-NEXT: bl .test_byval_2Byte
+; ASM32PWR4-NEXT: nop
+; ASM32PWR4-NEXT: addi 1, 1, 64
+
+; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @gS2, $x2 :: (load 8 from got)
+; 64BIT-NEXT: renamable $x3 = LHZ8 0, killed renamable $x[[REG]] :: (load 2)
+; 64BIT-NEXT: renamable $x3 = RLDICR killed renamable $x3, 48, 15
+; 64BIT-NEXT: BL8_NOP <mcsymbol .test_byval_2Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4: std 0, 16(1)
+; ASM64PWR4-NEXT: stdu 1, -112(1)
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64PWR4-NEXT: lhz 3, 0([[REG]])
+; ASM64PWR4-NEXT: sldi 3, 3, 48
+; ASM64PWR4-NEXT: bl .test_byval_2Byte
+; ASM64PWR4-NEXT: nop
+; ASM64PWR4-NEXT: addi 1, 1, 112
+
+%struct.S3 = type { [3 x i8] }
+
+ at gS3 = external global %struct.S3, align 1
+
+define void @call_test_byval_3Byte() {
+entry:
+ call void @test_byval_3Byte(%struct.S3* byval(%struct.S3) align 1 @gS3)
+ ret void
+}
+
+declare void @test_byval_3Byte(%struct.S3* byval(%struct.S3) align 1)
+
+; CHECK-LABEL: name: call_test_byval_3Byte{{.*}}
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS3, $r2 :: (load 4 from got)
+; 32BIT-DAG: renamable $r[[REG1:[0-9]+]] = LHZ 0, killed renamable $r[[REGADDR]] :: (load 2)
+; 32BIT-DAG: renamable $r[[REG2:[0-9]+]] = LBZ 2, renamable $r[[REGADDR]] :: (load 1)
+; 32BIT-DAG: renamable $r3 = RLWINM killed renamable $r[[REG2]], 8, 16, 23
+; 32BIT-DAG: renamable $r3 = RLWIMI killed renamable $r3, killed renamable $r[[REG1]], 16, 0, 15
+; 32BIT-NEXT: BL_NOP <mcsymbol .test_byval_3Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1
+; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_byval_3Byte:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM32PWR4: stwu 1, -64(1)
+; ASM32PWR4-NEXT: lwz [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM32PWR4-DAG: lhz [[REG1:[0-9]+]], 0([[REGADDR]])
+; ASM32PWR4-DAG: lbz [[REG2:[0-9]+]], 2([[REGADDR]])
+; ASM32PWR4-DAG: rlwinm 3, [[REG2]], 8, 16, 23
+; ASM32PWR4-DAG: rlwimi 3, [[REG1]], 16, 0, 15
+; ASM32PWR4-NEXT: bl .test_byval_3Byte
+; ASM32PWR4-NEXT: nop
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-DAG: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS3, $x2 :: (load 8 from got)
+; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LHZ8 0, killed renamable $x[[REGADDR]] :: (load 2)
+; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LBZ8 2, renamable $x[[REGADDR]] :: (load 1)
+; 64BIT-DAG: renamable $x3 = RLDIC killed renamable $x[[REG2]], 40, 16
+; 64BIT-DAG: renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 48, 0
+; 64BIT-NEXT: BL8_NOP <mcsymbol .test_byval_3Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM64PWR4: stdu 1, -112(1)
+; ASM64PWR4-NEXT: ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64PWR4-DAG: lhz [[REG1:[0-9]+]], 0([[REGADDR]])
+; ASM64PWR4-DAG: lbz [[REG2:[0-9]+]], 2([[REGADDR]])
+; ASM64PWR4-DAG: rldic 3, [[REG2]], 40, 16
+; ASM64PWR4-DAG: rldimi 3, [[REG1]], 48, 0
+; ASM64PWR4-NEXT: bl .test_byval_3Byte
+; ASM64PWR4-NEXT: nop
+
+%struct.S4 = type { [4 x i8] }
+
+ at gS4 = external global %struct.S4, align 1
+
+define void @call_test_byval_4Byte() {
+entry:
+ call void @test_byval_4Byte(%struct.S4* byval(%struct.S4) align 1 @gS4)
+ ret void
+}
+
+declare void @test_byval_4Byte(%struct.S4* byval(%struct.S4) align 1)
+
+; CHECK-LABEL: name: call_test_byval_4Byte{{.*}}
+
+; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @gS4, $r2 :: (load 4 from got)
+; 32BIT-NEXT: renamable $r3 = LWZ 0, killed renamable $r[[REG]] :: (load 4)
+; 32BIT-NEXT: BL_NOP <mcsymbol .test_byval_4Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1
+; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_byval_4Byte:
+
+; ASM32PWR4: stwu 1, -64(1)
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM32PWR4-NEXT: lwz 3, 0([[REG]])
+; ASM32PWR4-NEXT: bl .test_byval_4Byte
+; ASM32PWR4-NEXT: nop
+; ASM32PWR4-NEXT: addi 1, 1, 64
+
+; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @gS4, $x2 :: (load 8 from got)
+; 64BIT-NEXT: renamable $x3 = LWZ8 0, killed renamable $x[[REG]] :: (load 4)
+; 64BIT-NEXT: renamable $x3 = RLDICR killed renamable $x3, 32, 31
+; 64BIT-NEXT: BL8_NOP <mcsymbol .test_byval_4Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4: stdu 1, -112(1)
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64PWR4-NEXT: lwz 3, 0([[REG]])
+; ASM64PWR4-NEXT: sldi 3, 3, 32
+; ASM64PWR4-NEXT: bl .test_byval_4Byte
+; ASM64PWR4-NEXT: nop
+; ASM64PWR4-NEXT: addi 1, 1, 112
diff --git a/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll b/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll
new file mode 100644
index 000000000000..599ab13530b8
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll
@@ -0,0 +1,146 @@
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \
+; RUN: FileCheck --check-prefixes=CHECK,64BIT %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \
+; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN: FileCheck --check-prefixes=CHECKASM,ASM64PWR4 %s
+
+%struct.S5 = type { [5 x i8] }
+
+ at gS5 = external global %struct.S5, align 1
+
+define void @call_test_byval_5Byte() {
+entry:
+ call void @test_byval_5Byte(%struct.S5* byval(%struct.S5) align 1 @gS5)
+ ret void
+}
+
+declare void @test_byval_5Byte(%struct.S5* byval(%struct.S5) align 1)
+
+; CHECK-LABEL: name: call_test_byval_5Byte{{.*}}
+
+; CHECKASM-LABEL: .call_test_byval_5Byte:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS5, $x2 :: (load 8 from got)
+; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4)
+; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LBZ8 4, renamable $x[[REGADDR]] :: (load 1)
+; 64BIT-DAG: renamable $x3 = RLWINM8 killed renamable $x[[REG2]], 24, 0, 7
+; 64BIT-DAG: renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0
+; 64BIT-NEXT: BL8_NOP <mcsymbol .test_byval_5Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM64PWR4: stdu 1, -112(1)
+; ASM64PWR4-NEXT: ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64PWR4-DAG: lwz [[REG1:[0-9]+]], 0([[REGADDR]])
+; ASM64PWR4-DAG: lbz [[REG2:[0-9]+]], 4([[REGADDR]])
+; ASM64PWR4-DAG: rlwinm 3, [[REG2]], 24, 0, 7
+; ASM64PWR4-DAG: rldimi 3, [[REG1]], 32, 0
+; ASM64PWR4-NEXT: bl .test_byval_5Byte
+; ASM64PWR4-NEXT: nop
+
+%struct.S6 = type { [6 x i8] }
+
+ at gS6 = external global %struct.S6, align 1
+
+define void @call_test_byval_6Byte() {
+entry:
+ call void @test_byval_6Byte(%struct.S6* byval(%struct.S6) align 1 @gS6)
+ ret void
+}
+
+declare void @test_byval_6Byte(%struct.S6* byval(%struct.S6) align 1)
+
+; CHECK-LABEL: name: call_test_byval_6Byte{{.*}}
+
+; CHECKASM-LABEL: .call_test_byval_6Byte:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS6, $x2 :: (load 8 from got)
+; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4)
+; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LHZ8 4, renamable $x[[REGADDR]] :: (load 2)
+; 64BIT-DAG: renamable $x3 = RLWINM8 killed renamable $x[[REG2]], 16, 0, 15
+; 64BIT-DAG: renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0
+; 64BIT-NEXT: BL8_NOP <mcsymbol .test_byval_6Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM64PWR4: stdu 1, -112(1)
+; ASM64PWR4-NEXT: ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64PWR4-DAG: lwz [[REG1:[0-9]+]], 0([[REGADDR]])
+; ASM64PWR4-DAG: lhz [[REG2:[0-9]+]], 4([[REGADDR]])
+; ASM64PWR4-DAG: rlwinm 3, [[REG2]], 16, 0, 15
+; ASM64PWR4-DAG: rldimi 3, [[REG1]], 32, 0
+; ASM64PWR4-NEXT: bl .test_byval_6Byte
+; ASM64PWR4-NEXT: nop
+
+%struct.S7 = type { [7 x i8] }
+
+ at gS7 = external global %struct.S7, align 1
+
+define void @call_test_byval_7Byte() {
+entry:
+ call void @test_byval_7Byte(%struct.S7* byval(%struct.S7) align 1 @gS7)
+ ret void
+}
+
+declare void @test_byval_7Byte(%struct.S7* byval(%struct.S7) align 1)
+
+; CHECK-LABEL: name: call_test_byval_7Byte{{.*}}
+
+; CHECKASM-LABEL: .call_test_byval_7Byte:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS7, $x2 :: (load 8 from got)
+; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4)
+; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LHZ8 4, renamable $x[[REGADDR]] :: (load 2)
+; 64BIT-DAG: renamable $x[[REG3:[0-9]+]] = LBZ8 6, renamable $x[[REGADDR]] :: (load 1)
+; 64BIT-DAG: renamable $x3 = RLWINM8 killed renamable $x[[REG3]], 8, 16, 23
+; 64BIT-DAG: renamable $x3 = RLWIMI8 killed renamable $x3, killed renamable $x[[REG2]], 16, 0, 15
+; 64BIT-DAG: renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0
+; 64BIT-NEXT: BL8_NOP <mcsymbol .test_byval_7Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM64PWR4: stdu 1, -112(1)
+; ASM64PWR4-NEXT: ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64PWR4-DAG: lwz [[REG1:[0-9]+]], 0([[REGADDR]])
+; ASM64PWR4-DAG: lhz [[REG2:[0-9]+]], 4([[REGADDR]])
+; ASM64PWR4-DAG: lbz [[REG3:[0-9]+]], 6([[REGADDR]])
+; ASM64PWR4-DAG: rlwinm 3, [[REG3]], 8, 16, 23
+; ASM64PWR4-DAG: rlwimi 3, [[REG2]], 16, 0, 15
+; ASM64PWR4-DAG: rldimi 3, [[REG1]], 32, 0
+; ASM64PWR4-NEXT: bl .test_byval_7Byte
+; ASM64PWR4-NEXT: nop
+
+%struct.S8 = type { [8 x i8] }
+
+ at gS8 = external global %struct.S8, align 1
+
+define void @call_test_byval_8Byte() {
+entry:
+ call void @test_byval_8Byte(%struct.S8* byval(%struct.S8) align 1 @gS8)
+ ret void
+}
+
+declare void @test_byval_8Byte(%struct.S8* byval(%struct.S8) align 1)
+
+; CHECK-LABEL: name: call_test_byval_8Byte{{.*}}
+
+; CHECKASM-LABEL: .call_test_byval_8Byte:
+
+; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS8, $x2 :: (load 8 from got)
+; 64BIT-NEXT: renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load 8)
+; 64BIT-NEXT: BL8_NOP <mcsymbol .test_byval_8Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4: stdu 1, -112(1)
+; ASM64PWR4-NEXT: ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64PWR4-NEXT: ld 3, 0([[REGADDR]])
+; ASM64PWR4-NEXT: bl .test_byval_8Byte
+; ASM64PWR4-NEXT: nop
More information about the llvm-commits
mailing list