[llvm] d6ea82d - [AIX][PPC] Implement by-val caller arguments in multiple registers

Chris Bowler via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 6 08:07:08 PDT 2020


Author: Chris Bowler
Date: 2020-04-06T11:06:51-04:00
New Revision: d6ea82d11c6227f63c464258b0c699f16de09abe

URL: https://github.com/llvm/llvm-project/commit/d6ea82d11c6227f63c464258b0c699f16de09abe
DIFF: https://github.com/llvm/llvm-project/commit/d6ea82d11c6227f63c464258b0c699f16de09abe.diff

LOG: [AIX][PPC] Implement by-val caller arguments in multiple registers

Differential Revision: https://reviews.llvm.org/D76380

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll
    llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll
    llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
    llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 16b818ed1bf8..3d8441907c04 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -6866,16 +6866,16 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
       return false;
     }
 
-    if (ByValSize <= PtrByteSize) {
-      State.AllocateStack(PtrByteSize, PtrByteSize);
-      if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+    State.AllocateStack(alignTo(ByValSize, PtrByteSize), PtrByteSize);
+
+    for (unsigned I = 0, E = ByValSize; I < E; I += PtrByteSize) {
+      if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
         State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
-        return false;
-      }
+      else
+        report_fatal_error(
+            "Pass-by-value arguments are only supported in registers.");
     }
-
-    report_fatal_error(
-        "Pass-by-value arguments are only supported in a single register.");
+    return false;
   }
 
   // Arguments always reserve parameter save area.
@@ -7222,68 +7222,91 @@ SDValue PPCTargetLowering::LowerCall_AIX(
                                    : DAG.getRegister(PPC::R1, MVT::i32);
 
   for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
-    CCValAssign &VA = ArgLocs[I++];
-
-    SDValue Arg = OutVals[VA.getValNo()];
-    ISD::ArgFlagsTy Flags = Outs[VA.getValNo()].Flags;
-    const MVT LocVT = VA.getLocVT();
-    const MVT ValVT = VA.getValVT();
+    const unsigned ValNo = ArgLocs[I].getValNo();
+    SDValue Arg = OutVals[ValNo];
+    ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
 
     if (Flags.isByVal()) {
       const unsigned ByValSize = Flags.getByValSize();
 
       // Nothing to do for zero-sized ByVals on the caller side.
-      if (!ByValSize)
+      if (!ByValSize) {
+        ++I;
         continue;
+      }
+
+      auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
+        return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
+                              (LoadOffset != 0)
+                                  ? DAG.getObjectPtrOffset(dl, Arg, LoadOffset)
+                                  : Arg,
+                              MachinePointerInfo(), VT);
+      };
 
-      assert(
-          VA.isRegLoc() && ByValSize <= PtrByteSize &&
-          "Pass-by-value arguments are only supported in a single register.");
+      unsigned LoadOffset = 0;
 
-      // Loads must be a power-of-2 size and cannot be larger than the
-      // ByValSize. For example: a 7 byte by-val arg requires 4, 2 and 1 byte
-      // loads.
-      SDValue RegVal;
-      for (unsigned Bytes = 0; Bytes != ByValSize;) {
-        unsigned N = PowerOf2Floor(ByValSize - Bytes);
+      // Initialize registers, which are fully occupied by the by-val argument.
+      while (I != E && LoadOffset + PtrByteSize <= ByValSize) {
+        SDValue Load = GetLoad(PtrVT, LoadOffset);
+        MemOpChains.push_back(Load.getValue(1));
+        LoadOffset += PtrByteSize;
+        const CCValAssign &ByValVA = ArgLocs[I++];
+        assert(ByValVA.isRegLoc() && ByValVA.getValNo() == ValNo &&
+               "Unexpected location for pass-by-value argument.");
+        RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
+      }
+
+      if (LoadOffset == ByValSize)
+        continue;
+
+      const unsigned ResidueBytes = ByValSize % PtrByteSize;
+      assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
+             "Unexpected register residue for by-value argument.");
+
+      // Initialize the final register residue.
+      // Any residue that occupies the final by-val arg register must be
+      // left-justified on AIX. Loads must be a power-of-2 size and cannot be
+      // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
+      // 2 and 1 byte loads.
+      SDValue ResidueVal;
+      for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
+        const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
         const MVT VT =
             N == 1 ? MVT::i8
                    : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
-
-        SDValue LoadAddr = Arg;
-        if (Bytes != 0) {
-          // Adjust the load offset by the number of bytes read so far.
-          SDNodeFlags Flags;
-          Flags.setNoUnsignedWrap(true);
-          LoadAddr = DAG.getNode(ISD::ADD, dl, LocVT, Arg,
-                                 DAG.getConstant(Bytes, dl, LocVT), Flags);
-        }
-        SDValue Load = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, LoadAddr,
-                                      MachinePointerInfo(), VT);
+        SDValue Load = GetLoad(VT, LoadOffset);
         MemOpChains.push_back(Load.getValue(1));
-
+        LoadOffset += N;
         Bytes += N;
-        assert(LocVT.getSizeInBits() >= (Bytes * 8));
-        if (unsigned NumSHLBits = LocVT.getSizeInBits() - (Bytes * 8)) {
-          // By-val arguments are passed left-justfied in register.
-          EVT ShiftAmountTy =
-              getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
-          SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
-          SDValue ShiftedLoad =
-              DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
-          RegVal = RegVal ? DAG.getNode(ISD::OR, dl, LocVT, RegVal, ShiftedLoad)
-                          : ShiftedLoad;
-        } else {
-          assert(!RegVal && Bytes == ByValSize &&
-                 "Pass-by-value argument handling unexpectedly incomplete.");
-          RegVal = Load;
-        }
+
+        // By-val arguments are passed left-justfied in register.
+        // Every load here needs to be shifted, otherwise a full register load
+        // should have been used.
+        assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
+               "Unexpected load emitted during handling of pass-by-value "
+               "argument.");
+        unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
+        EVT ShiftAmountTy =
+            getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
+        SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
+        SDValue ShiftedLoad =
+            DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
+        ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
+                                              ShiftedLoad)
+                                : ShiftedLoad;
       }
 
-      RegsToPass.push_back(std::make_pair(VA.getLocReg(), RegVal));
+      const CCValAssign &ByValVA = ArgLocs[I++];
+      assert(ByValVA.isRegLoc() && ByValVA.getValNo() == ValNo &&
+             "Additional register location expected for by-value argument.");
+      RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
       continue;
     }
 
+    CCValAssign &VA = ArgLocs[I++];
+    const MVT LocVT = VA.getLocVT();
+    const MVT ValVT = VA.getValVT();
+
     switch (VA.getLocInfo()) {
     default:
       report_fatal_error("Unexpected argument extension type.");

diff  --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll
index 745323ce8a5d..d7815e7bf4e1 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll
@@ -1,7 +1,7 @@
 ; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
 ; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
 
-%struct.S = type { [9 x i8] }
+%struct.S = type { [65 x i8] }
 
 define void @bar() {
 entry:
@@ -13,4 +13,4 @@ entry:
 
 declare void @foo(%struct.S* byval(%struct.S) align 1)
 
-; CHECK: LLVM ERROR: Pass-by-value arguments are only supported in a single register.
+; CHECK: LLVM ERROR: Pass-by-value arguments are only supported in registers.

diff  --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll
index b9c48272d147..37700f251429 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll
@@ -13,4 +13,4 @@ entry:
 
 declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, %struct.S* byval(%struct.S) align 1)
 
-; CHECK: LLVM ERROR: Pass-by-value arguments are only supported in a single register.
+; CHECK: LLVM ERROR: Pass-by-value arguments are only supported in registers.

diff  --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
index fd86a60ce490..6ff8fffdc2d1 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
@@ -455,3 +455,417 @@ entry:
 ; ASM64-NEXT:   add [[SCRATCH3:[0-9]+]], [[SCRATCH2]], [[SCRATCH1]]
 ; ASM64-NEXT:   extsw 3, [[SCRATCH3]]
 ; ASM64-NEXT:   blr
+
+
+%struct.S5 = type { [5 x i8] }
+
+ at gS5 = external global %struct.S5, align 1
+
+define void @call_test_byval_5Byte() {
+entry:
+  %call = call zeroext i8 @test_byval_5Byte(%struct.S5* byval(%struct.S5) align 1 @gS5)
+  ret void
+}
+
+declare zeroext i8 @test_byval_5Byte(%struct.S5* byval(%struct.S5) align 1)
+
+; CHECK-LABEL: name: call_test_byval_5Byte{{.*}}
+
+; ASM-LABEL: .call_test_byval_5Byte:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 32BIT:       ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT:  renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS5, $r2 :: (load 4 from got)
+; 32BIT-DAG:   renamable $r[[REG1:[0-9]+]] = LBZ 4, renamable $r[[REGADDR]] :: (load 1)
+; 32BIT-DAG:   renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r4 = RLWINM killed renamable $r[[REG1]], 24, 0, 7
+; 32BIT-NEXT:  BL_NOP <mcsymbol .test_byval_5Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r2, implicit-def $r1
+; 32BIT-NEXT:   ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_byval_5Byte:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM32:       stwu 1, -64(1)
+; ASM32-NEXT:  lwz [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM32-DAG:   lbz [[REG1:[0-9]+]], 4([[REGADDR]])
+; ASM32-DAG:   lwz 3, 0([[REGADDR]])
+; ASM32-DAG:   slwi 4, [[REG1]], 24
+; ASM32-NEXT:  bl .test_byval_5Byte
+; ASM32-NEXT:  nop
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 64BIT:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT:  renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS5, $x2 :: (load 8 from got)
+; 64BIT-DAG:   renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4)
+; 64BIT-DAG:   renamable $x[[REG2:[0-9]+]] = LBZ8 4, renamable $x[[REGADDR]] :: (load 1)
+; 64BIT-DAG:   renamable $x3 = RLWINM8 killed renamable $x[[REG2]], 24, 0, 7
+; 64BIT-DAG:   renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_byval_5Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM64:       stdu 1, -112(1)
+; ASM64-NEXT:  ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64-DAG:   lwz [[REG1:[0-9]+]], 0([[REGADDR]])
+; ASM64-DAG:   lbz [[REG2:[0-9]+]], 4([[REGADDR]])
+; ASM64-DAG:   rlwinm 3, [[REG2]], 24, 0, 7
+; ASM64-DAG:   rldimi 3, [[REG1]], 32, 0
+; ASM64-NEXT:  bl .test_byval_5Byte
+; ASM64-NEXT:  nop
+
+
+%struct.S6 = type { [6 x i8] }
+
+ at gS6 = external global %struct.S6, align 1
+
+define void @call_test_byval_6Byte() {
+entry:
+  %call = call zeroext i8 @test_byval_6Byte(%struct.S6* byval(%struct.S6) align 1 @gS6)
+  ret void
+}
+
+declare zeroext i8 @test_byval_6Byte(%struct.S6* byval(%struct.S6) align 1)
+
+; CHECK-LABEL: name: call_test_byval_6Byte{{.*}}
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 32BIT:       ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT:  renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS6, $r2 :: (load 4 from got)
+; 32BIT-DAG:   renamable $r[[REG1:[0-9]+]] = LHZ 4, renamable $r[[REGADDR]] :: (load 2)
+; 32BIT-DAG:   renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r4 = RLWINM killed renamable $r[[REG1]], 16, 0, 15
+; 32BIT-NEXT:  BL_NOP <mcsymbol .test_byval_6Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r2, implicit-def $r1
+; 32BIT-NEXT:  ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_byval_6Byte:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM32:       stwu 1, -64(1)
+; ASM32-NEXT:  lwz [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM32-DAG:   lhz [[REG1:[0-9]+]], 4([[REGADDR]])
+; ASM32-DAG:   lwz 3, 0([[REGADDR]])
+; ASM32-DAG:   slwi 4, [[REG1]], 16
+; ASM32-NEXT:  bl .test_byval_6Byte
+; ASM32-NEXT:  nop
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 64BIT:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT:  renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS6, $x2 :: (load 8 from got)
+; 64BIT-DAG:   renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4)
+; 64BIT-DAG:   renamable $x[[REG2:[0-9]+]] = LHZ8 4, renamable $x[[REGADDR]] :: (load 2)
+; 64BIT-DAG:   renamable $x3 = RLWINM8 killed renamable $x[[REG2]], 16, 0, 15
+; 64BIT-DAG:   renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_byval_6Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM64:       stdu 1, -112(1)
+; ASM64-NEXT:  ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64-DAG:   lwz [[REG1:[0-9]+]], 0([[REGADDR]])
+; ASM64-DAG:   lhz [[REG2:[0-9]+]], 4([[REGADDR]])
+; ASM64-DAG:   rlwinm 3, [[REG2]], 16, 0, 15
+; ASM64-DAG:   rldimi 3, [[REG1]], 32, 0
+; ASM64-NEXT:  bl .test_byval_6Byte
+; ASM64-NEXT:  nop
+
+
+%struct.S7 = type { [7 x i8] }
+
+ at gS7 = external global %struct.S7, align 1
+
+define void @call_test_byval_7Byte() {
+entry:
+  %call = call zeroext i8 @test_byval_7Byte(%struct.S7* byval(%struct.S7) align 1 @gS7)
+  ret void
+}
+
+declare zeroext i8 @test_byval_7Byte(%struct.S7* byval(%struct.S7) align 1)
+
+; CHECK-LABEL: name: call_test_byval_7Byte{{.*}}
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 32BIT:       ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT:  renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS7, $r2 :: (load 4 from got)
+; 32BIT-DAG:   renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r[[REG1:[0-9]+]] = LHZ 4, renamable $r[[REGADDR]] :: (load 2)
+; 32BIT-DAG:   renamable $r[[REG2:[0-9]+]] = LBZ 6, renamable $r[[REGADDR]] :: (load 1)
+; 32BIT-DAG:   renamable $r4 = RLWINM killed renamable $r[[REG2]], 8, 16, 23
+; 32BIT-DAG:   renamable $r4 = RLWIMI killed renamable $r4, killed renamable $r[[REG1]], 16, 0, 15
+; 32BIT-NEXT:  BL_NOP <mcsymbol .test_byval_7Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r2, implicit-def $r1
+; 32BIT-NEXT:  ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_byval_7Byte:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM32:       stwu 1, -64(1)
+; ASM32-NEXT:  lwz [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM32-DAG:   lwz 3, 0([[REGADDR]])
+; ASM32-DAG:   lhz [[REG1:[0-9]+]], 4([[REGADDR]])
+; ASM32-DAG:   lbz [[REG2:[0-9]+]], 6([[REGADDR]])
+; ASM32-DAG:   rlwinm 4, [[REG2]], 8, 16, 23
+; ASM32-DAG:   rlwimi 4, [[REG1]], 16, 0, 15
+; ASM32-NEXT:  bl .test_byval_7Byte
+; ASM32-NEXT:  nop
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 64BIT:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT:  renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS7, $x2 :: (load 8 from got)
+; 64BIT-DAG:   renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4)
+; 64BIT-DAG:   renamable $x[[REG2:[0-9]+]] = LHZ8 4, renamable $x[[REGADDR]] :: (load 2)
+; 64BIT-DAG:   renamable $x[[REG3:[0-9]+]] = LBZ8 6, renamable $x[[REGADDR]] :: (load 1)
+; 64BIT-DAG:   renamable $x3 = RLWINM8 killed renamable $x[[REG3]], 8, 16, 23
+; 64BIT-DAG:   renamable $x3 = RLWIMI8 killed renamable $x3, killed renamable $x[[REG2]], 16, 0, 15
+; 64BIT-DAG:   renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_byval_7Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM64:       stdu 1, -112(1)
+; ASM64-NEXT:  ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64-DAG:   lwz [[REG1:[0-9]+]], 0([[REGADDR]])
+; ASM64-DAG:   lhz [[REG2:[0-9]+]], 4([[REGADDR]])
+; ASM64-DAG:   lbz [[REG3:[0-9]+]], 6([[REGADDR]])
+; ASM64-DAG:   rlwinm 3, [[REG3]], 8, 16, 23
+; ASM64-DAG:   rlwimi 3, [[REG2]], 16, 0, 15
+; ASM64-DAG:   rldimi 3, [[REG1]], 32, 0
+; ASM64-NEXT:  bl .test_byval_7Byte
+; ASM64-NEXT:  nop
+
+
+%struct.S8 = type { [8 x i8] }
+
+ at gS8 = external global %struct.S8, align 1
+
+define void @call_test_byval_8Byte() {
+entry:
+  %call = call zeroext i8 @test_byval_8Byte(%struct.S8* byval(%struct.S8) align 1 @gS8)
+  ret void
+}
+
+declare zeroext i8 @test_byval_8Byte(%struct.S8* byval(%struct.S8) align 1)
+
+; CHECK-LABEL: name: call_test_byval_8Byte{{.*}}
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 32BIT:       ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT:  renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS8, $r2 :: (load 4 from got)
+; 32BIT-DAG:   renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r4 = LWZ 4, renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-NEXT:  BL_NOP <mcsymbol .test_byval_8Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r2, implicit-def $r1
+; 32BIT-NEXT:  ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_byval_8Byte:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM32:       stwu 1, -64(1)
+; ASM32-NEXT:  lwz [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM32-DAG:   lwz 3, 0([[REGADDR]])
+; ASM32-DAG:   lwz 4, 4([[REGADDR]])
+; ASM32-NEXT:  bl .test_byval_8Byte
+; ASM32-NEXT:  nop
+
+; 64BIT:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT:  renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS8, $x2 :: (load 8 from got)
+; 64BIT-NEXT:  renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load 8)
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_byval_8Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
+; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64:       stdu 1, -112(1)
+; ASM64-NEXT:  ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64-NEXT:  ld 3, 0([[REGADDR]])
+; ASM64-NEXT:  bl .test_byval_8Byte
+; ASM64-NEXT:  nop
+
+
+%struct.S32 = type { [32 x i8] }
+
+ at gS32 = external global %struct.S32, align 1
+
+define void @call_test_byval_32Byte() {
+entry:
+  %call = call zeroext i8 @test_byval_32Byte(%struct.S32* byval(%struct.S32) align 1 @gS32)
+  ret void
+}
+
+declare zeroext i8 @test_byval_32Byte(%struct.S32* byval(%struct.S32) align 1 %s)
+
+; CHECK-LABEL: name: call_test_byval_32Byte{{.*}}
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 32BIT:       ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT:  renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS32, $r2 :: (load 4 from got)
+; 32BIT-DAG:   renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r4 = LWZ 4, renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r5 = LWZ 8, renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r6 = LWZ 12, renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r7 = LWZ 16, renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r8 = LWZ 20, renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r9 = LWZ 24, renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r10 = LWZ 28, renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-NEXT:  BL_NOP <mcsymbol .test_byval_32Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1
+; 32BIT-NEXT:  ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_byval_32Byte:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM32:       stwu 1, -64(1)
+; ASM32-NEXT:  lwz [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM32-DAG:   lwz 3, 0([[REGADDR]])
+; ASM32-DAG:   lwz 4, 4([[REGADDR]])
+; ASM32-DAG:   lwz 5, 8([[REGADDR]])
+; ASM32-DAG:   lwz 6, 12([[REGADDR]])
+; ASM32-DAG:   lwz 7, 16([[REGADDR]])
+; ASM32-DAG:   lwz 8, 20([[REGADDR]])
+; ASM32-DAG:   lwz 9, 24([[REGADDR]])
+; ASM32-DAG:   lwz 10, 28([[REGADDR]])
+; ASM32-NEXT:  bl .test_byval_32Byte
+; ASM32-NEXT:  nop
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 64BIT:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT:  renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS32, $x2 :: (load 8 from got)
+; 64BIT-DAG:   renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load 8)
+; 64BIT-DAG:   renamable $x4 = LD 8, renamable $x[[REGADDR]] :: (load 8)
+; 64BIT-DAG:   renamable $x5 = LD 16, renamable $x[[REGADDR]] :: (load 8)
+; 64BIT-DAG:   renamable $x6 = LD 24, renamable $x[[REGADDR]] :: (load 8)
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_byval_32Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x2, implicit-def $r1
+; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64:       stdu 1, -112(1)
+; ASM64-NEXT:  ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64-DAG:   ld 3, 0([[REGADDR]])
+; ASM64-DAG:   ld 4, 8([[REGADDR]])
+; ASM64-DAG:   ld 5, 16([[REGADDR]])
+; ASM64-DAG:   ld 6, 24([[REGADDR]])
+; ASM64-NEXT:  bl .test_byval_32Byte
+; ASM64-NEXT:  nop
+
+
+%struct.S31 = type { [31 x i8] }
+
+ at gS31 = external global %struct.S31, align 1
+
+define void @call_test_byval_31Byte() {
+entry:
+  %call = call zeroext i8 @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1 @gS31)
+  ret void
+}
+
+declare zeroext i8 @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1)
+
+; CHECK-LABEL: name: call_test_byval_31Byte{{.*}}
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 32BIT:       ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT:  renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS31, $r2 :: (load 4 from got)
+; 32BIT-DAG:   renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r4 = LWZ 4, renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r5 = LWZ 8, renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r6 = LWZ 12, renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r7 = LWZ 16, renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r8 = LWZ 20, renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r9 = LWZ 24, renamable $r[[REGADDR]] :: (load 4)
+; 32BIT-DAG:   renamable $r[[REG:[0-9]+]] = LHZ 28, renamable $r[[REGADDR]] :: (load 2)
+; 32BIT-DAG:   renamable $r10 = LBZ 30, renamable $r[[REGADDR]] :: (load 1)
+; 32BIT-DAG:   renamable $r10 = RLWINM killed renamable $r10, 8, 16, 23
+; 32BIT-DAG:   renamable $r10 = RLWIMI killed renamable $r10, killed renamable $r[[REG]], 16, 0, 15
+; 32BIT-NEXT:  BL_NOP <mcsymbol .test_byval_31Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1
+; 32BIT-NEXT:  ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_byval_31Byte:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM32:       stwu 1, -64(1)
+; ASM32-NEXT:  lwz [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM32-DAG:   lwz 3, 0([[REGADDR]])
+; ASM32-DAG:   lwz 4, 4([[REGADDR]])
+; ASM32-DAG:   lwz 5, 8([[REGADDR]])
+; ASM32-DAG:   lwz 6, 12([[REGADDR]])
+; ASM32-DAG:   lwz 7, 16([[REGADDR]])
+; ASM32-DAG:   lwz 8, 20([[REGADDR]])
+; ASM32-DAG:   lwz 9, 24([[REGADDR]])
+; ASM32-DAG:   lbz 10, 30([[REGADDR]])
+; ASM32-DAG:   lhz [[REG:[0-9]+]], 28([[REGADDR]])
+; ASM32-DAG:   rlwinm 10, 10, 8, 16, 23
+; ASM32-DAG:   rlwimi 10, [[REG]], 16, 0, 15
+; ASM32-NEXT:  bl .test_byval_31Byte
+; ASM32-NEXT:  nop
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 64BIT:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT:  renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS31, $x2 :: (load 8 from got)
+; 64BIT-DAG:   renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load 8)
+; 64BIT-DAG:   renamable $x4 = LD 8, renamable $x[[REGADDR]] :: (load 8)
+; 64BIT-DAG:   renamable $x5 = LD 16, renamable $x[[REGADDR]] :: (load 8)
+; 64BIT-DAG:   renamable $x[[REG1:[0-9]+]] = LWZ8 24, renamable $x[[REGADDR]] :: (load 4)
+; 64BIT-DAG:   renamable $x[[REG2:[0-9]+]] = LHZ8 28, renamable $x[[REGADDR]] :: (load 2)
+; 64BIT-DAG:   renamable $x[[REG3:[0-9]+]] = LBZ8 30, renamable $x[[REGADDR]] :: (load 1)
+; 64BIT-DAG:   renamable $x6 = RLWINM8 killed renamable $x[[REG3]], 8, 16, 23
+; 64BIT-DAG:   renamable $x6 = RLWIMI8 killed renamable $x6, killed renamable $x[[REG2]], 16, 0, 15
+; 64BIT-DAG:   renamable $x6 = RLDIMI killed renamable $x6, killed renamable $x[[REG1]], 32, 0
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_byval_31Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x2, implicit-def $r1
+; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64:       stdu 1, -112(1)
+; ASM64-NEXT:  ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM64-DAG:   ld 3, 0([[REGADDR]])
+; ASM64-DAG:   ld 4, 8([[REGADDR]])
+; ASM64-DAG:   ld 5, 16([[REGADDR]])
+; ASM64-DAG:   lwz [[REG1:[0-9]+]], 24([[REGADDR]])
+; ASM64-DAG:   lhz [[REG2:[0-9]+]], 28([[REGADDR]])
+; ASM64-DAG:   lbz [[REG3:[0-9]+]], 30([[REGADDR]])
+; ASM64-DAG:   rlwinm 6, [[REG3]], 8, 16, 23
+; ASM64-DAG:   rlwimi 6, [[REG2]], 16, 0, 15
+; ASM64-DAG:   rldimi 6, [[REG1]], 32, 0
+; ASM64-NEXT:  bl .test_byval_31Byte
+; ASM64-NEXT:  nop
+
+
+%struct.F = type { float, float, float }
+
+define i32 @call_test_byval_homogeneous_float_struct() {
+entry:
+  %s = alloca %struct.F, align 4
+  %0 = bitcast %struct.F* %s to i8*
+  call void @llvm.memset.p0i8.i32(i8* align 4 %0, i8 0, i32 12, i1 false)
+  %call = call i32 @test_byval_homogeneous_float_struct(%struct.F* byval(%struct.F) align 4 %s)
+  ret i32 %call
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg)
+
+declare i32 @test_byval_homogeneous_float_struct(%struct.F* byval(%struct.F) align 4)
+
+; CHECK-LABEL: name: call_test_byval_homogeneous_float_struct{{.*}}
+
+; 32BIT:       ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-DAG:   renamable $r3 = LWZ 0, %stack.0.s :: (load 4 from %stack.0.s, align 8)
+; 32BIT-DAG:   renamable $r4 = LWZ 4, %stack.0.s :: (load 4 from %stack.0.s + 4)
+; 32BIT-DAG:   renamable $r5 = LWZ 8, %stack.0.s :: (load 4 from %stack.0.s + 8, align 8)
+; 32BIT-NEXT:  BL_NOP <mcsymbol .test_byval_homogeneous_float_struct>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3
+; 32BIT-NEXT:  ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_byval_homogeneous_float_struct:
+
+; ASM32:       stwu 1, -80(1)
+; ASM32-DAG:   lwz 3, 64(1)
+; ASM32-DAG:   lwz 4, 68(1)
+; ASM32-DAG:   lwz 5, 72(1)
+; ASM32-NEXT:  bl .test_byval_homogeneous_float_struct
+; ASM32-NEXT:  nop
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 64BIT:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-DAG:   renamable $x3 = LD 0, %stack.0.s :: (load 8 from %stack.0.s)
+; 64BIT-DAG:   renamable $x4 = LWZ8 8, %stack.0.s :: (load 4 from %stack.0.s + 8, align 8)
+; 64BIT-DAG:   renamable $x4 = RLDICR killed renamable $x4, 32, 31
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_byval_homogeneous_float_struct>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x2, implicit-def $r1, implicit-def $x3
+; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM64:       stdu 1, -128(1)
+; ASM64-DAG:   ld 3, 112(1)
+; ASM64-DAG:   lwz 4, 120(1)
+; ASM64-DAG:   sldi 4, 4, 32
+; ASM64-NEXT:  bl .test_byval_homogeneous_float_struct
+; ASM64-NEXT:  nop

diff  --git a/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll b/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll
index 6f2384ff8b29..ed84cdd1e781 100644
--- a/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll
+++ b/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll
@@ -8,39 +8,6 @@
 
 %struct.S5 = type { [5 x i8] }
 
- at gS5 = external global %struct.S5, align 1
-
-define void @call_test_byval_5Byte() {
-entry:
-  %call = call zeroext i8 @test_byval_5Byte(%struct.S5* byval(%struct.S5) align 1 @gS5)
-  ret void
-}
-
-; CHECK-LABEL: name: call_test_byval_5Byte{{.*}}
-
-; ASM-LABEL: .call_test_byval_5Byte:
-
-; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
-; CHECK:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
-; CHECK-NEXT:  renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS5, $x2 :: (load 8 from got)
-; CHECK-DAG:   renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4)
-; CHECK-DAG:   renamable $x[[REG2:[0-9]+]] = LBZ8 4, renamable $x[[REGADDR]] :: (load 1)
-; CHECK-DAG:   renamable $x3 = RLWINM8 killed renamable $x[[REG2]], 24, 0, 7
-; CHECK-DAG:   renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0
-; CHECK-NEXT:  BL8_NOP <mcsymbol .test_byval_5Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
-; CHECK-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
-
-; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
-; ASM:       stdu 1, -112(1)
-; ASM-NEXT:  ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
-; ASM-DAG:   lwz [[REG1:[0-9]+]], 0([[REGADDR]])
-; ASM-DAG:   lbz [[REG2:[0-9]+]], 4([[REGADDR]])
-; ASM-DAG:   rlwinm 3, [[REG2]], 24, 0, 7
-; ASM-DAG:   rldimi 3, [[REG1]], 32, 0
-; ASM-NEXT:  bl .test_byval_5Byte
-; ASM-NEXT:  nop
-
-
 define zeroext i8 @test_byval_5Byte(%struct.S5* byval(%struct.S5) align 1 %s) {
 entry:
   %arrayidx = getelementptr inbounds %struct.S5, %struct.S5* %s, i32 0, i32 0, i32 4
@@ -66,39 +33,6 @@ entry:
 
 %struct.S6 = type { [6 x i8] }
 
- at gS6 = external global %struct.S6, align 1
-
-define void @call_test_byval_6Byte() {
-entry:
-  %call = call zeroext i8 @test_byval_6Byte(%struct.S6* byval(%struct.S6) align 1 @gS6)
-  ret void
-}
-
-; CHECK-LABEL: name: call_test_byval_6Byte{{.*}}
-
-; ASM-LABEL: .call_test_byval_6Byte:
-
-; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
-; CHECK:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
-; CHECK-NEXT:  renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS6, $x2 :: (load 8 from got)
-; CHECK-DAG:   renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4)
-; CHECK-DAG:   renamable $x[[REG2:[0-9]+]] = LHZ8 4, renamable $x[[REGADDR]] :: (load 2)
-; CHECK-DAG:   renamable $x3 = RLWINM8 killed renamable $x[[REG2]], 16, 0, 15
-; CHECK-DAG:   renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0
-; CHECK-NEXT:  BL8_NOP <mcsymbol .test_byval_6Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
-; CHECK-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
-
-; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
-; ASM:       stdu 1, -112(1)
-; ASM-NEXT:  ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
-; ASM-DAG:   lwz [[REG1:[0-9]+]], 0([[REGADDR]])
-; ASM-DAG:   lhz [[REG2:[0-9]+]], 4([[REGADDR]])
-; ASM-DAG:   rlwinm 3, [[REG2]], 16, 0, 15
-; ASM-DAG:   rldimi 3, [[REG1]], 32, 0
-; ASM-NEXT:  bl .test_byval_6Byte
-; ASM-NEXT:  nop
-
-
 define zeroext i8 @test_byval_6Byte(%struct.S6* byval(%struct.S6) align 1 %s) {
 entry:
   %arrayidx = getelementptr inbounds %struct.S6, %struct.S6* %s, i32 0, i32 0, i32 5
@@ -121,44 +55,8 @@ entry:
 ; ASM-NEXT:  lbz 3, 53(1)
 ; ASM-NEXT:  blr
 
-%struct.S7 = type { [7 x i8] }
-
- at gS7 = external global %struct.S7, align 1
-
-define void @call_test_byval_7Byte() {
-entry:
-  %call = call zeroext i8 @test_byval_7Byte(%struct.S7* byval(%struct.S7) align 1 @gS7)
-  ret void
-}
-
-; CHECK-LABEL: name: call_test_byval_7Byte{{.*}}
-
-; ASM-LABEL: .call_test_byval_7Byte:
-
-; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
-; CHECK:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
-; CHECK-NEXT:  renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS7, $x2 :: (load 8 from got)
-; CHECK-DAG:   renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4)
-; CHECK-DAG:   renamable $x[[REG2:[0-9]+]] = LHZ8 4, renamable $x[[REGADDR]] :: (load 2)
-; CHECK-DAG:   renamable $x[[REG3:[0-9]+]] = LBZ8 6, renamable $x[[REGADDR]] :: (load 1)
-; CHECK-DAG:   renamable $x3 = RLWINM8 killed renamable $x[[REG3]], 8, 16, 23
-; CHECK-DAG:   renamable $x3 = RLWIMI8 killed renamable $x3, killed renamable $x[[REG2]], 16, 0, 15
-; CHECK-DAG:   renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0
-; CHECK-NEXT:  BL8_NOP <mcsymbol .test_byval_7Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
-; CHECK-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
-
-; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
-; ASM:       stdu 1, -112(1)
-; ASM-NEXT:  ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
-; ASM-DAG:   lwz [[REG1:[0-9]+]], 0([[REGADDR]])
-; ASM-DAG:   lhz [[REG2:[0-9]+]], 4([[REGADDR]])
-; ASM-DAG:   lbz [[REG3:[0-9]+]], 6([[REGADDR]])
-; ASM-DAG:   rlwinm 3, [[REG3]], 8, 16, 23
-; ASM-DAG:   rlwimi 3, [[REG2]], 16, 0, 15
-; ASM-DAG:   rldimi 3, [[REG1]], 32, 0
-; ASM-NEXT:  bl .test_byval_7Byte
-; ASM-NEXT:  nop
 
+%struct.S7 = type { [7 x i8] }
 
 define zeroext i8 @test_byval_7Byte(%struct.S7* byval(%struct.S7) align 1 %s) {
 entry:
@@ -185,31 +83,6 @@ entry:
 
 %struct.S8 = type { [8 x i8] }
 
- at gS8 = external global %struct.S8, align 1
-
-define void @call_test_byval_8Byte() {
-entry:
-  %call = call zeroext i8 @test_byval_8Byte(%struct.S8* byval(%struct.S8) align 1 @gS8)
-  ret void
-}
-
-; CHECK-LABEL: name: call_test_byval_8Byte{{.*}}
-
-; ASM-LABEL: .call_test_byval_8Byte:
-
-; CHECK:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
-; CHECK-NEXT:  renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS8, $x2 :: (load 8 from got)
-; CHECK-NEXT:  renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load 8)
-; CHECK-NEXT:  BL8_NOP <mcsymbol .test_byval_8Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
-; CHECK-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
-
-; ASM:       stdu 1, -112(1)
-; ASM-NEXT:  ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
-; ASM-NEXT:  ld 3, 0([[REGADDR]])
-; ASM-NEXT:  bl .test_byval_8Byte
-; ASM-NEXT:  nop
-
-
 define zeroext i8 @test_byval_8Byte(%struct.S8* byval(%struct.S8) align 1 %s) {
 entry:
   %arrayidx = getelementptr inbounds %struct.S8, %struct.S8* %s, i32 0, i32 0, i32 7
@@ -234,3 +107,47 @@ entry:
 ; ASM-DAG:   clrldi  3, 3, 56
 ; ASM-DAG:   std [[SCRATCH]], 48(1)
 ; ASM-NEXT:  blr
+
+
+%struct.S64 = type { [64 x i8] }
+
+ at gS64 = external global %struct.S64, align 1
+
+define void @call_test_byval_64Byte() {
+entry:
+  call void @test_byval_64Byte(%struct.S64* byval(%struct.S64) align 1 @gS64)
+  ret void
+}
+
+declare void @test_byval_64Byte(%struct.S64* byval(%struct.S64) align 1)
+
+; CHECK-LABEL: name: call_test_byval_64Byte{{.*}}
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; CHECK:       ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; CHECK-NEXT:  renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS64, $x2 :: (load 8 from got)
+; CHECK-DAG:   renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load 8)
+; CHECK-DAG:   renamable $x4 = LD 8, renamable $x[[REGADDR]] :: (load 8)
+; CHECK-DAG:   renamable $x5 = LD 16, renamable $x[[REGADDR]] :: (load 8)
+; CHECK-DAG:   renamable $x6 = LD 24, renamable $x[[REGADDR]] :: (load 8)
+; CHECK-DAG:   renamable $x7 = LD 32, renamable $x[[REGADDR]] :: (load 8)
+; CHECK-DAG:   renamable $x8 = LD 40, renamable $x[[REGADDR]] :: (load 8)
+; CHECK-DAG:   renamable $x9 = LD 48, renamable $x[[REGADDR]] :: (load 8)
+; CHECK-DAG:   renamable $x10 = LD 56, renamable $x[[REGADDR]] :: (load 8)
+; CHECK-NEXT:  BL8_NOP <mcsymbol .test_byval_64Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1
+; CHECK-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .test_byval_64Byte:
+
+; ASM:         stdu 1, -112(1)
+; ASM-NEXT:    ld [[REG:[0-9]+]], LC{{[0-9]+}}(2)
+; ASM-DAG:     ld 3, 0([[REG]])
+; ASM-DAG:     ld 4, 8([[REG]])
+; ASM-DAG:     ld 5, 16([[REG]])
+; ASM-DAG:     ld 6, 24([[REG]])
+; ASM-DAG:     ld 7, 32([[REG]])
+; ASM-DAG:     ld 8, 40([[REG]])
+; ASM-DAG:     ld 9, 48([[REG]])
+; ASM-DAG:     ld 10, 56([[REG]])
+; ASM-NEXT:    bl .test_byval_64Byte
+; ASM-NEXT:    nop


        


More information about the llvm-commits mailing list