[llvm] c218664 - [PowerPC][AIX] Implement by-val caller arguments in a single register.

Sean Fertile via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 18 07:58:19 PDT 2020


Author: Chris Bowler
Date: 2020-03-18T10:57:28-04:00
New Revision: c21866476e144377102ed358455dc105b4977764

URL: https://github.com/llvm/llvm-project/commit/c21866476e144377102ed358455dc105b4977764
DIFF: https://github.com/llvm/llvm-project/commit/c21866476e144377102ed358455dc105b4977764.diff

LOG: [PowerPC][AIX] Implement by-val caller arguments in a single register.

This is the first of a series of patches that adds caller support for
by-value arguments. This patch add support for arguments that are passed in a
single GPR.

There are 3 limitation cases:
-The by-value argument is larger than a single register.
-There are no remaining GPRs even though the by-value argument would
otherwise fit in a single GPR.
-The by-value argument requires alignment greater than register width.

Future patches will be required to add support for these cases as well
as for the callee handling (in LowerFormalArguments_AIX) that
corresponds to this work.

Differential Revision: https://reviews.llvm.org/D75863

Added: 
    llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll
    llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll
    llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll
    llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
    llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Removed: 
    llvm/test/CodeGen/PowerPC/aix-byval-param.ll


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 22e7f8a7b97e..2d718011059a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -6841,9 +6841,6 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
   if (ValVT == MVT::f128)
     report_fatal_error("f128 is unimplemented on AIX.");
 
-  if (ArgFlags.isByVal())
-    report_fatal_error("Passing structure by value is unimplemented.");
-
   if (ArgFlags.isNest())
     report_fatal_error("Nest arguments are unimplemented.");
 
@@ -6857,6 +6854,29 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
                                      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
                                      PPC::X7, PPC::X8, PPC::X9, PPC::X10};
 
+  if (ArgFlags.isByVal()) {
+    if (ArgFlags.getNonZeroByValAlign() > PtrByteSize)
+      report_fatal_error("Pass-by-value arguments with alignment greater than "
+                         "register width are not supported.");
+
+    const unsigned ByValSize = ArgFlags.getByValSize();
+
+    // An empty aggregate parameter takes up no storage and no registers.
+    if (ByValSize == 0)
+      return false;
+
+    if (ByValSize <= PtrByteSize) {
+      State.AllocateStack(PtrByteSize, PtrByteSize);
+      if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+        State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
+        return false;
+      }
+    }
+
+    report_fatal_error(
+        "Pass-by-value arguments are only supported in a single register.");
+  }
+
   // Arguments always reserve parameter save area.
   switch (ValVT.SimpleTy) {
   default:
@@ -7130,9 +7150,59 @@ SDValue PPCTargetLowering::LowerCall_AIX(
     CCValAssign &VA = ArgLocs[I++];
 
     SDValue Arg = OutVals[VA.getValNo()];
+    ISD::ArgFlagsTy Flags = Outs[VA.getValNo()].Flags;
+    const MVT LocVT = VA.getLocVT();
+    const MVT ValVT = VA.getValVT();
+
+    if (Flags.isByVal()) {
+      const unsigned ByValSize = Flags.getByValSize();
+      assert(
+          VA.isRegLoc() && ByValSize > 0 && ByValSize <= PtrByteSize &&
+          "Pass-by-value arguments are only supported in a single register.");
+
+      // Loads must be a power-of-2 size and cannot be larger than the
+      // ByValSize. For example: a 7 byte by-val arg requires 4, 2 and 1 byte
+      // loads.
+      SDValue RegVal;
+      for (unsigned Bytes = 0; Bytes != ByValSize;) {
+        unsigned N = PowerOf2Floor(ByValSize - Bytes);
+        const MVT VT =
+            N == 1 ? MVT::i8
+                   : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
+
+        SDValue LoadAddr = Arg;
+        if (Bytes != 0) {
+          // Adjust the load offset by the number of bytes read so far.
+          SDNodeFlags Flags;
+          Flags.setNoUnsignedWrap(true);
+          LoadAddr = DAG.getNode(ISD::ADD, dl, LocVT, Arg,
+                                 DAG.getConstant(Bytes, dl, LocVT), Flags);
+        }
+        SDValue Load = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, LoadAddr,
+                                      MachinePointerInfo(), VT);
+        MemOpChains.push_back(Load.getValue(1));
 
-    if (!VA.isRegLoc() && !VA.isMemLoc())
-      report_fatal_error("Unexpected location for function call argument.");
+        Bytes += N;
+        assert(LocVT.getSizeInBits() >= (Bytes * 8));
+        if (unsigned NumSHLBits = LocVT.getSizeInBits() - (Bytes * 8)) {
+          // By-val arguments are passed left-justfied in register.
+          EVT ShiftAmountTy =
+              getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
+          SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
+          SDValue ShiftedLoad =
+              DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
+          RegVal = RegVal ? DAG.getNode(ISD::OR, dl, LocVT, RegVal, ShiftedLoad)
+                          : ShiftedLoad;
+        } else {
+          assert(!RegVal && Bytes == ByValSize &&
+                 "Pass-by-value argument handling unexpectedly incomplete.");
+          RegVal = Load;
+        }
+      }
+
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), RegVal));
+      continue;
+    }
 
     switch (VA.getLocInfo()) {
     default:
@@ -7165,20 +7235,20 @@ SDValue PPCTargetLowering::LowerCall_AIX(
     // Custom handling is used for GPR initializations for vararg float
     // arguments.
     assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
-           VA.getValVT().isFloatingPoint() && VA.getLocVT().isInteger() &&
+           ValVT.isFloatingPoint() && LocVT.isInteger() &&
            "Unexpected register handling for calling convention.");
 
     SDValue ArgAsInt =
-        DAG.getBitcast(MVT::getIntegerVT(VA.getValVT().getSizeInBits()), Arg);
+        DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
 
-    if (Arg.getValueType().getStoreSize() == VA.getLocVT().getStoreSize())
+    if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
       // f32 in 32-bit GPR
       // f64 in 64-bit GPR
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
-    else if (Arg.getValueType().getSizeInBits() < VA.getLocVT().getSizeInBits())
+    else if (Arg.getValueType().getSizeInBits() < LocVT.getSizeInBits())
       // f32 in 64-bit GPR.
       RegsToPass.push_back(std::make_pair(
-          VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, VA.getLocVT())));
+          VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
     else {
       // f64 in two 32-bit GPRs
       // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.

diff  --git a/llvm/test/CodeGen/PowerPC/aix-byval-param.ll b/llvm/test/CodeGen/PowerPC/aix-byval-param.ll
deleted file mode 100644
index 2dfdf7f8535f..000000000000
--- a/llvm/test/CodeGen/PowerPC/aix-byval-param.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
-; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
-
-%struct.S = type { i32, i32 }
-
-define void @bar() {
-entry:
-  %s1 = alloca %struct.S, align 4
-  %agg.tmp = alloca %struct.S, align 4
-  call void @foo(%struct.S* byval(%struct.S) align 4 %agg.tmp)
-  ret void
-}
-
-declare void @foo(%struct.S* byval(%struct.S) align 4)
-
-; CHECK: LLVM ERROR: Passing structure by value is unimplemented.

diff  --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll
new file mode 100644
index 000000000000..745323ce8a5d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll
@@ -0,0 +1,16 @@
+; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
+; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
+
+%struct.S = type { [9 x i8] }
+
+define void @bar() {
+entry:
+  %s1 = alloca %struct.S, align 1
+  %agg.tmp = alloca %struct.S, align 1
+  call void @foo(%struct.S* byval(%struct.S) align 1 %agg.tmp)
+  ret void
+}
+
+declare void @foo(%struct.S* byval(%struct.S) align 1)
+
+; CHECK: LLVM ERROR: Pass-by-value arguments are only supported in a single register.

diff  --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll
new file mode 100644
index 000000000000..b9c48272d147
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll
@@ -0,0 +1,16 @@
+; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
+; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
+
+%struct.S = type { [1 x i8] }
+
+define void @bar() {
+entry:
+  %s1 = alloca %struct.S, align 1
+  %agg.tmp = alloca %struct.S, align 1
+  call void @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, %struct.S* byval(%struct.S) align 1 %agg.tmp)
+  ret void
+}
+
+declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, %struct.S* byval(%struct.S) align 1)
+
+; CHECK: LLVM ERROR: Pass-by-value arguments are only supported in a single register.

diff  --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll
new file mode 100644
index 000000000000..357acfe595bb
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll
@@ -0,0 +1,16 @@
+; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
+; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
+
+%struct.S = type { [1 x i8] }
+
+define void @bar() {
+entry:
+  %s1 = alloca %struct.S, align 32
+  %agg.tmp = alloca %struct.S, align 32
+  call void @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, %struct.S* byval(%struct.S) align 32 %agg.tmp)
+  ret void
+}
+
+declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, %struct.S* byval(%struct.S) align 32)
+
+; CHECK: LLVM ERROR: Pass-by-value arguments with alignment greater than register width are not supported.

diff  --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
new file mode 100644
index 000000000000..0f90da66d60d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
@@ -0,0 +1,206 @@
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \
+; RUN: FileCheck --check-prefixes=CHECK,32BIT %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \
+; RUN:  -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN: FileCheck --check-prefixes=CHECKASM,ASM32PWR4 %s
+
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \
+; RUN: FileCheck --check-prefixes=CHECK,64BIT %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \
+; RUN:  -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN: FileCheck --check-prefixes=CHECKASM,ASM64PWR4 %s
+
+%struct.S1 = type { [1 x i8] }
+ at gS1 = external global %struct.S1, align 1
+
+define void @call_test_byval_1Byte() {
+entry:
+  call void @test_byval_1Byte(%struct.S1* byval(%struct.S1) align 1 @gS1)
+  ret void
+}
+
+declare void @test_byval_1Byte(%struct.S1* byval(%struct.S1) align 1)
+
+; CHECK-LABEL: name: call_test_byval_1Byte{{.*}}
+
+; 32BIT:       ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT:  renamable $r[[REG:[0-9]+]] = LWZtoc @gS1, $r2 :: (load 4 from got)
+; 32BIT-NEXT:  renamable $r3 = LBZ 0, killed renamable $r[[REG]] :: (load 1)
+; 32BIT-NEXT:  renamable $r3 = RLWINM killed renamable $r3, 24, 0, 7
+; 32BIT-NEXT:  BL_NOP <mcsymbol .test_byval_1Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1
+; 32BIT-NEXT:  ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_byval_1Byte:
+
+; ASM32PWR4:       stwu 1, -64(1)
+; ASM32PWR4-NEXT:  lwz [[REG:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM32PWR4-NEXT:  lbz 3, 0([[REG]])
+; ASM32PWR4-NEXT:  slwi 3, 3, 24
+; ASM32PWR4-NEXT:  bl .test_byval_1Byte
+; ASM32PWR4-NEXT:  nop
+; ASM32PWR4-NEXT:  addi 1, 1, 64
+
+; 64BIT:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT:  renamable $x[[REG:[0-9]+]] = LDtoc @gS1, $x2 :: (load 8 from got)
+; 64BIT-NEXT:  renamable $x3 = LBZ8 0, killed renamable $x[[REG]] :: (load 1)
+; 64BIT-NEXT:  renamable $x3 = RLDICR killed renamable $x3, 56, 7
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_byval_1Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4:       std 0, 16(1)
+; ASM64PWR4-NEXT:  stdu 1, -112(1)
+; ASM64PWR4-NEXT:  ld [[REG:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64PWR4-NEXT:  lbz 3, 0([[REG]])
+; ASM64PWR4-NEXT:  sldi 3, 3, 56
+; ASM64PWR4-NEXT:  bl .test_byval_1Byte
+; ASM64PWR4-NEXT:  nop
+; ASM64PWR4-NEXT:  addi 1, 1, 112
+
+%struct.S2 = type { [2 x i8] }
+
+ at gS2 = external global %struct.S2, align 1
+
+define void @call_test_byval_2Byte() {
+entry:
+  call void @test_byval_2Byte(%struct.S2* byval(%struct.S2) align 1 @gS2)
+  ret void
+}
+
+declare void @test_byval_2Byte(%struct.S2* byval(%struct.S2) align 1)
+
+; CHECK-LABEL: name: call_test_byval_2Byte{{.*}}
+
+; 32BIT:       ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT:  renamable $r[[REG:[0-9]+]] = LWZtoc @gS2, $r2 :: (load 4 from got)
+; 32BIT-NEXT:  renamable $r3 = LHZ 0, killed renamable $r[[REG]] :: (load 2)
+; 32BIT-NEXT:  renamable $r3 = RLWINM killed renamable $r3, 16, 0, 15
+; 32BIT-NEXT:  BL_NOP <mcsymbol .test_byval_2Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1
+; 32BIT-NEXT:  ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_byval_2Byte:
+
+; ASM32PWR4:       stwu 1, -64(1)
+; ASM32PWR4-NEXT:  lwz [[REG:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM32PWR4-NEXT:  lhz 3, 0([[REG]])
+; ASM32PWR4-NEXT:  slwi 3, 3, 16
+; ASM32PWR4-NEXT:  bl .test_byval_2Byte
+; ASM32PWR4-NEXT:  nop
+; ASM32PWR4-NEXT:  addi 1, 1, 64
+
+; 64BIT:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT:  renamable $x[[REG:[0-9]+]] = LDtoc @gS2, $x2 :: (load 8 from got)
+; 64BIT-NEXT:  renamable $x3 = LHZ8 0, killed renamable $x[[REG]] :: (load 2)
+; 64BIT-NEXT:  renamable $x3 = RLDICR killed renamable $x3, 48, 15
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_byval_2Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4:       std 0, 16(1)
+; ASM64PWR4-NEXT:  stdu 1, -112(1)
+; ASM64PWR4-NEXT:  ld [[REG:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64PWR4-NEXT:  lhz 3, 0([[REG]])
+; ASM64PWR4-NEXT:  sldi 3, 3, 48
+; ASM64PWR4-NEXT:  bl .test_byval_2Byte
+; ASM64PWR4-NEXT:  nop
+; ASM64PWR4-NEXT:  addi 1, 1, 112
+
+%struct.S3 = type { [3 x i8] }
+
+ at gS3 = external global %struct.S3, align 1
+
+define void @call_test_byval_3Byte() {
+entry:
+  call void @test_byval_3Byte(%struct.S3* byval(%struct.S3) align 1 @gS3)
+  ret void
+}
+
+declare void @test_byval_3Byte(%struct.S3* byval(%struct.S3) align 1)
+
+; CHECK-LABEL: name: call_test_byval_3Byte{{.*}}
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 32BIT:       ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT:  renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS3, $r2 :: (load 4 from got)
+; 32BIT-DAG:   renamable $r[[REG1:[0-9]+]] = LHZ 0, killed renamable $r[[REGADDR]] :: (load 2)
+; 32BIT-DAG:   renamable $r[[REG2:[0-9]+]] = LBZ 2, renamable $r[[REGADDR]] :: (load 1)
+; 32BIT-DAG:   renamable $r3 = RLWINM killed renamable $r[[REG2]], 8, 16, 23
+; 32BIT-DAG:   renamable $r3 = RLWIMI killed renamable $r3, killed renamable $r[[REG1]], 16, 0, 15
+; 32BIT-NEXT:  BL_NOP <mcsymbol .test_byval_3Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1
+; 32BIT-NEXT:  ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_byval_3Byte:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM32PWR4:       stwu 1, -64(1)
+; ASM32PWR4-NEXT:  lwz [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM32PWR4-DAG:   lhz [[REG1:[0-9]+]], 0([[REGADDR]])
+; ASM32PWR4-DAG:   lbz [[REG2:[0-9]+]], 2([[REGADDR]])
+; ASM32PWR4-DAG:   rlwinm 3, [[REG2]], 8, 16, 23
+; ASM32PWR4-DAG:   rlwimi 3, [[REG1]], 16, 0, 15
+; ASM32PWR4-NEXT:  bl .test_byval_3Byte
+; ASM32PWR4-NEXT:  nop
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 64BIT:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-DAG:   renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS3, $x2 :: (load 8 from got)
+; 64BIT-DAG:   renamable $x[[REG1:[0-9]+]] = LHZ8 0, killed renamable $x[[REGADDR]] :: (load 2)
+; 64BIT-DAG:   renamable $x[[REG2:[0-9]+]] = LBZ8 2, renamable $x[[REGADDR]] :: (load 1)
+; 64BIT-DAG:   renamable $x3 = RLDIC killed renamable $x[[REG2]], 40, 16
+; 64BIT-DAG:   renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 48, 0
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_byval_3Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM64PWR4:       stdu 1, -112(1)
+; ASM64PWR4-NEXT:  ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64PWR4-DAG:   lhz [[REG1:[0-9]+]], 0([[REGADDR]])
+; ASM64PWR4-DAG:   lbz [[REG2:[0-9]+]], 2([[REGADDR]])
+; ASM64PWR4-DAG:   rldic 3, [[REG2]], 40, 16
+; ASM64PWR4-DAG:   rldimi 3, [[REG1]], 48, 0
+; ASM64PWR4-NEXT:  bl .test_byval_3Byte
+; ASM64PWR4-NEXT:  nop
+
+%struct.S4 = type { [4 x i8] }
+
+ at gS4 = external global %struct.S4, align 1
+
+define void @call_test_byval_4Byte() {
+entry:
+  call void @test_byval_4Byte(%struct.S4* byval(%struct.S4) align 1 @gS4)
+  ret void
+}
+
+declare void @test_byval_4Byte(%struct.S4* byval(%struct.S4) align 1)
+
+; CHECK-LABEL: name: call_test_byval_4Byte{{.*}}
+
+; 32BIT:       ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT:  renamable $r[[REG:[0-9]+]] = LWZtoc @gS4, $r2 :: (load 4 from got)
+; 32BIT-NEXT:  renamable $r3 = LWZ 0, killed renamable $r[[REG]] :: (load 4)
+; 32BIT-NEXT:  BL_NOP <mcsymbol .test_byval_4Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1
+; 32BIT-NEXT:  ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_byval_4Byte:
+
+; ASM32PWR4:       stwu 1, -64(1)
+; ASM32PWR4-NEXT:  lwz [[REG:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM32PWR4-NEXT:  lwz 3, 0([[REG]])
+; ASM32PWR4-NEXT:  bl .test_byval_4Byte
+; ASM32PWR4-NEXT:  nop
+; ASM32PWR4-NEXT:  addi 1, 1, 64
+
+; 64BIT:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT:  renamable $x[[REG:[0-9]+]] = LDtoc @gS4, $x2 :: (load 8 from got)
+; 64BIT-NEXT:  renamable $x3 = LWZ8 0, killed renamable $x[[REG]] :: (load 4)
+; 64BIT-NEXT:  renamable $x3 = RLDICR killed renamable $x3, 32, 31
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_byval_4Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4:       stdu 1, -112(1)
+; ASM64PWR4-NEXT:  ld [[REG:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64PWR4-NEXT:  lwz 3, 0([[REG]])
+; ASM64PWR4-NEXT:  sldi 3, 3, 32
+; ASM64PWR4-NEXT:  bl .test_byval_4Byte
+; ASM64PWR4-NEXT:  nop
+; ASM64PWR4-NEXT:  addi 1, 1, 112

diff  --git a/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll b/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll
new file mode 100644
index 000000000000..599ab13530b8
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll
@@ -0,0 +1,146 @@
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \
+; RUN: FileCheck --check-prefixes=CHECK,64BIT %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \
+; RUN:  -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN: FileCheck --check-prefixes=CHECKASM,ASM64PWR4 %s
+
+%struct.S5 = type { [5 x i8] }
+
+ at gS5 = external global %struct.S5, align 1
+
+define void @call_test_byval_5Byte() {
+entry:
+  call void @test_byval_5Byte(%struct.S5* byval(%struct.S5) align 1 @gS5)
+  ret void
+}
+
+declare void @test_byval_5Byte(%struct.S5* byval(%struct.S5) align 1)
+
+; CHECK-LABEL: name: call_test_byval_5Byte{{.*}}
+
+; CHECKASM-LABEL: .call_test_byval_5Byte:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 64BIT:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT:  renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS5, $x2 :: (load 8 from got)
+; 64BIT-DAG:   renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4)
+; 64BIT-DAG:   renamable $x[[REG2:[0-9]+]] = LBZ8 4, renamable $x[[REGADDR]] :: (load 1)
+; 64BIT-DAG:   renamable $x3 = RLWINM8 killed renamable $x[[REG2]], 24, 0, 7
+; 64BIT-DAG:   renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_byval_5Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM64PWR4:       stdu 1, -112(1)
+; ASM64PWR4-NEXT:  ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64PWR4-DAG:   lwz [[REG1:[0-9]+]], 0([[REGADDR]])
+; ASM64PWR4-DAG:   lbz [[REG2:[0-9]+]], 4([[REGADDR]])
+; ASM64PWR4-DAG:   rlwinm 3, [[REG2]], 24, 0, 7
+; ASM64PWR4-DAG:   rldimi 3, [[REG1]], 32, 0
+; ASM64PWR4-NEXT:  bl .test_byval_5Byte
+; ASM64PWR4-NEXT:  nop
+
+%struct.S6 = type { [6 x i8] }
+
+ at gS6 = external global %struct.S6, align 1
+
+define void @call_test_byval_6Byte() {
+entry:
+  call void @test_byval_6Byte(%struct.S6* byval(%struct.S6) align 1 @gS6)
+  ret void
+}
+
+declare void @test_byval_6Byte(%struct.S6* byval(%struct.S6) align 1)
+
+; CHECK-LABEL: name: call_test_byval_6Byte{{.*}}
+
+; CHECKASM-LABEL: .call_test_byval_6Byte:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 64BIT:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT:  renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS6, $x2 :: (load 8 from got)
+; 64BIT-DAG:   renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4)
+; 64BIT-DAG:   renamable $x[[REG2:[0-9]+]] = LHZ8 4, renamable $x[[REGADDR]] :: (load 2)
+; 64BIT-DAG:   renamable $x3 = RLWINM8 killed renamable $x[[REG2]], 16, 0, 15
+; 64BIT-DAG:   renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_byval_6Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM64PWR4:       stdu 1, -112(1)
+; ASM64PWR4-NEXT:  ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64PWR4-DAG:   lwz [[REG1:[0-9]+]], 0([[REGADDR]])
+; ASM64PWR4-DAG:   lhz [[REG2:[0-9]+]], 4([[REGADDR]])
+; ASM64PWR4-DAG:   rlwinm 3, [[REG2]], 16, 0, 15
+; ASM64PWR4-DAG:   rldimi 3, [[REG1]], 32, 0
+; ASM64PWR4-NEXT:  bl .test_byval_6Byte
+; ASM64PWR4-NEXT:  nop
+
+%struct.S7 = type { [7 x i8] }
+
+ at gS7 = external global %struct.S7, align 1
+
+define void @call_test_byval_7Byte() {
+entry:
+  call void @test_byval_7Byte(%struct.S7* byval(%struct.S7) align 1 @gS7)
+  ret void
+}
+
+declare void @test_byval_7Byte(%struct.S7* byval(%struct.S7) align 1)
+
+; CHECK-LABEL: name: call_test_byval_7Byte{{.*}}
+
+; CHECKASM-LABEL: .call_test_byval_7Byte:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 64BIT:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT:  renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS7, $x2 :: (load 8 from got)
+; 64BIT-DAG:   renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4)
+; 64BIT-DAG:   renamable $x[[REG2:[0-9]+]] = LHZ8 4, renamable $x[[REGADDR]] :: (load 2)
+; 64BIT-DAG:   renamable $x[[REG3:[0-9]+]] = LBZ8 6, renamable $x[[REGADDR]] :: (load 1)
+; 64BIT-DAG:   renamable $x3 = RLWINM8 killed renamable $x[[REG3]], 8, 16, 23
+; 64BIT-DAG:   renamable $x3 = RLWIMI8 killed renamable $x3, killed renamable $x[[REG2]], 16, 0, 15
+; 64BIT-DAG:   renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_byval_7Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM64PWR4:       stdu 1, -112(1)
+; ASM64PWR4-NEXT:  ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64PWR4-DAG:   lwz [[REG1:[0-9]+]], 0([[REGADDR]])
+; ASM64PWR4-DAG:   lhz [[REG2:[0-9]+]], 4([[REGADDR]])
+; ASM64PWR4-DAG:   lbz [[REG3:[0-9]+]], 6([[REGADDR]])
+; ASM64PWR4-DAG:   rlwinm 3, [[REG3]], 8, 16, 23
+; ASM64PWR4-DAG:   rlwimi 3, [[REG2]], 16, 0, 15
+; ASM64PWR4-DAG:   rldimi 3, [[REG1]], 32, 0
+; ASM64PWR4-NEXT:  bl .test_byval_7Byte
+; ASM64PWR4-NEXT:  nop
+
+%struct.S8 = type { [8 x i8] }
+
+ at gS8 = external global %struct.S8, align 1
+
+define void @call_test_byval_8Byte() {
+entry:
+  call void @test_byval_8Byte(%struct.S8* byval(%struct.S8) align 1 @gS8)
+  ret void
+}
+
+declare void @test_byval_8Byte(%struct.S8* byval(%struct.S8) align 1)
+
+; CHECK-LABEL: name: call_test_byval_8Byte{{.*}}
+
+; CHECKASM-LABEL: .call_test_byval_8Byte:
+
+; 64BIT:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT:  renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS8, $x2 :: (load 8 from got)
+; 64BIT-NEXT:  renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load 8)
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_byval_8Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4:       stdu 1, -112(1)
+; ASM64PWR4-NEXT:  ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64PWR4-NEXT:  ld 3, 0([[REGADDR]])
+; ASM64PWR4-NEXT:  bl .test_byval_8Byte
+; ASM64PWR4-NEXT:  nop


        


More information about the llvm-commits mailing list