[llvm] aaeffbe - [PowerPC][AIX] Handle variadic vector formal arguments.

Sean Fertile via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 4 07:58:32 PST 2021


Author: Sean Fertile
Date: 2021-03-04T10:56:53-05:00
New Revision: aaeffbe00736e53922e39227268d5178f8dd10be

URL: https://github.com/llvm/llvm-project/commit/aaeffbe00736e53922e39227268d5178f8dd10be
DIFF: https://github.com/llvm/llvm-project/commit/aaeffbe00736e53922e39227268d5178f8dd10be.diff

LOG: [PowerPC][AIX] Handle variadic vector formal arguments.

Patch adds support for passing vector arguments to variadic functions.
Arguments which are fixed shadow GPRs and stack space even when they are
passed in vector registers, while arguments passed through ellipses are
passed in(properly aligned GPRs if available and on the stack once all
GPR arguments registers are consumed.

Differential Revision: https://reviews.llvm.org/D97485

Added: 
    llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee-split.ll
    llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee.ll
    llvm/test/CodeGen/PowerPC/aix32-vector-vararg-caller-split.ll
    llvm/test/CodeGen/PowerPC/aix32-vector-vararg-fixed-callee.ll
    llvm/test/CodeGen/PowerPC/aix64-vector-vararg-callee.ll
    llvm/test/CodeGen/PowerPC/aix64-vector-vararg-fixed-callee.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCCCState.h
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCCCState.h b/llvm/lib/Target/PowerPC/PPCCCState.h
index e3499597474c..b0e50b230fb1 100644
--- a/llvm/lib/Target/PowerPC/PPCCCState.h
+++ b/llvm/lib/Target/PowerPC/PPCCCState.h
@@ -10,6 +10,7 @@
 #define PPCCCSTATE_H
 
 #include "PPCISelLowering.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 
@@ -36,6 +37,37 @@ class PPCCCState : public CCState {
   bool WasOriginalArgPPCF128(unsigned ValNo) { return OriginalArgWasPPCF128[ValNo]; }
   void clearWasPPCF128() { OriginalArgWasPPCF128.clear(); }
 };
-}
+
+class AIXCCState : public CCState {
+private:
+  BitVector IsFixed;
+
+public:
+  AIXCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
+             SmallVectorImpl<CCValAssign> &Locs, LLVMContext &C)
+      : CCState(CC, IsVarArg, MF, Locs, C) {}
+
+  void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                              CCAssignFn Fn) {
+    // All formal arguments are fixed.
+    IsFixed.resize(Ins.size(), true);
+    CCState::AnalyzeFormalArguments(Ins, Fn);
+  }
+
+  void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           CCAssignFn Fn) {
+    // Record whether the call operand was a fixed argument.
+    IsFixed.resize(Outs.size(), false);
+    for (unsigned ValNo = 0, E = Outs.size(); ValNo != E; ++ValNo)
+      if (Outs[ValNo].IsFixed)
+        IsFixed.set(ValNo);
+
+    CCState::AnalyzeCallOperands(Outs, Fn);
+  }
+
+  bool isFixed(unsigned ValNo) const { return IsFixed.test(ValNo); }
+};
+
+} // end namespace llvm
 
 #endif

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9675c012fe0a..8b01d772285f 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -6264,10 +6264,43 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
 }
 
+// Returns true when the shadow of a general purpose argument register
+// in the parameter save area is aligned to at least 'RequiredAlign'.
+static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
+  assert(RequiredAlign.value() <= 16 &&
+         "Required alignment greater than stack alignment.");
+  switch (Reg) {
+  default:
+    report_fatal_error("called on invalid register.");
+  case PPC::R5:
+  case PPC::R9:
+  case PPC::X3:
+  case PPC::X5:
+  case PPC::X7:
+  case PPC::X9:
+    // These registers are 16 byte aligned which is the most strict aligment
+    // we can support.
+    return true;
+  case PPC::R3:
+  case PPC::R7:
+  case PPC::X4:
+  case PPC::X6:
+  case PPC::X8:
+  case PPC::X10:
+    // The shadow of these registers in the PSA is 8 byte aligned.
+    return RequiredAlign <= 8;
+  case PPC::R4:
+  case PPC::R6:
+  case PPC::R8:
+  case PPC::R10:
+    return RequiredAlign <= 4;
+  }
+}
+
 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
-                   CCState &State) {
-
+                   CCState &S) {
+  AIXCCState &State = static_cast<AIXCCState &>(S);
   const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
       State.getMachineFunction().getSubtarget());
   const bool IsPPC64 = Subtarget.isPPC64();
@@ -6399,18 +6432,97 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
   case MVT::v2i64:
   case MVT::v2f64:
   case MVT::v1i128: {
-    if (State.isVarArg())
-      report_fatal_error(
-          "variadic arguments for vector types are unimplemented for AIX");
+    const unsigned VecSize = 16;
+    const Align VecAlign(VecSize);
+
+    if (!State.isVarArg()) {
+      // If there are vector registers remaining we don't consume any stack
+      // space.
+      if (unsigned VReg = State.AllocateReg(VR)) {
+        State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
+        return false;
+      }
+      // Vectors passed on the stack do not shadow GPRs or FPRs even though they
+      // might be allocated in the portion of the PSA that is shadowed by the
+      // GPRs.
+      const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
+      State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+      return false;
+    }
 
-    if (unsigned VReg = State.AllocateReg(VR)) {
-      State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
+    const unsigned PtrSize = IsPPC64 ? 8 : 4;
+    ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
+
+    unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
+    // Burn any underaligned registers and their shadowed stack space until
+    // we reach the required alignment.
+    while (NextRegIndex != GPRs.size() &&
+           !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
+      // Shadow allocate register and its stack shadow.
+      unsigned Reg = State.AllocateReg(GPRs);
+      State.AllocateStack(PtrSize, PtrAlign);
+      assert(Reg && "Allocating register unexpectedly failed.");
+      (void)Reg;
+      NextRegIndex = State.getFirstUnallocated(GPRs);
+    }
+
+    // Vectors that are passed as fixed arguments are handled 
diff erently.
+    // They are passed in VRs if any are available (unlike arguments passed
+    // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
+    // functions)
+    if (State.isFixed(ValNo)) {
+      if (unsigned VReg = State.AllocateReg(VR)) {
+        State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
+        // Shadow allocate GPRs and stack space even though we pass in a VR.
+        for (unsigned I = 0; I != VecSize; I += PtrSize)
+          State.AllocateReg(GPRs);
+        State.AllocateStack(VecSize, VecAlign);
+        return false;
+      }
+      // No vector registers remain so pass on the stack.
+      const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
+      State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
       return false;
     }
 
-    const unsigned VecSize = 16;
-    const unsigned Offset = State.AllocateStack(VecSize, Align(VecSize));
-    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    // If all GPRS are consumed then we pass the argument fully on the stack.
+    if (NextRegIndex == GPRs.size()) {
+      const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
+      State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+      return false;
+    }
+
+    // Corner case for 32-bit codegen. We have 2 registers to pass the first
+    // half of the argument, and then need to pass the remaining half on the
+    // stack.
+    if (GPRs[NextRegIndex] == PPC::R9) {
+      const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
+      State.addLoc(
+          CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+
+      const unsigned FirstReg = State.AllocateReg(PPC::R9);
+      const unsigned SecondReg = State.AllocateReg(PPC::R10);
+      assert(FirstReg && SecondReg &&
+             "Allocating R9 or R10 unexpectedly failed.");
+      State.addLoc(
+          CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
+      State.addLoc(
+          CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
+      return false;
+    }
+
+    // We have enough GPRs to fully pass the vector argument, and we have
+    // already consumed any underaligned registers. Start with the custom
+    // MemLoc and then the custom RegLocs.
+    const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
+    State.addLoc(
+        CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    for (unsigned I = 0; I != VecSize; I += PtrSize) {
+      const unsigned Reg = State.AllocateReg(GPRs);
+      assert(Reg && "Failed to allocated register for vararg vector argument");
+      State.addLoc(
+          CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
+    }
     return false;
   }
   }
@@ -6544,7 +6656,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo &MFI = MF.getFrameInfo();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
-  CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
+  AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
 
   const EVT PtrVT = getPointerTy(MF.getDataLayout());
   // Reserve space for the linkage area on the stack.
@@ -6557,6 +6669,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
   for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
     CCValAssign &VA = ArgLocs[I++];
     MVT LocVT = VA.getLocVT();
+    MVT ValVT = VA.getValVT();
     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
     // For compatibility with the AIX XL compiler, the float args in the
     // parameter save area are initialized even if the argument is available
@@ -6564,8 +6677,69 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
     // and memory, however, the callee can choose to expect it in either.
     // The memloc is dismissed here because the argument is retrieved from
     // the register.
-    if (VA.isMemLoc() && VA.needsCustom())
+    if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
+      continue;
+
+    auto HandleMemLoc = [&]() {
+      const unsigned LocSize = LocVT.getStoreSize();
+      const unsigned ValSize = ValVT.getStoreSize();
+      assert((ValSize <= LocSize) &&
+             "Object size is larger than size of MemLoc");
+      int CurArgOffset = VA.getLocMemOffset();
+      // Objects are right-justified because AIX is big-endian.
+      if (LocSize > ValSize)
+        CurArgOffset += LocSize - ValSize;
+      // Potential tail calls could cause overwriting of argument stack slots.
+      const bool IsImmutable =
+          !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+            (CallConv == CallingConv::Fast));
+      int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      SDValue ArgValue =
+          DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
+      InVals.push_back(ArgValue);
+    };
+
+    // Vector arguments to VaArg functions are passed both on the stack, and
+    // in any available GPRs. Load the value from the stack and add the GPRs
+    // as live ins.
+    if (VA.isMemLoc() && VA.needsCustom()) {
+      assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
+      assert(isVarArg && "Only use custom memloc for vararg.");
+      // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
+      // matching custom RegLocs.
+      const unsigned OriginalValNo = VA.getValNo();
+
+      auto HandleCustomVecRegLoc = [&]() {
+        assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
+               "Missing custom RegLoc.");
+        VA = ArgLocs[I++];
+        assert(VA.getValVT().isVector() &&
+               "Unexpected Val type for custom RegLoc.");
+        assert(VA.getValNo() == OriginalValNo &&
+               "ValNo mismatch between custom MemLoc and RegLoc.");
+        MVT::SimpleValueType SVT = VA.getLocVT().SimpleTy;
+        MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
+      };
+
+      HandleMemLoc();
+      // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
+      // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
+      // R10.
+      HandleCustomVecRegLoc();
+      HandleCustomVecRegLoc();
+
+      // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
+      // we passed the vector in R5, R6, R7 and R8.
+      if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
+        assert(!IsPPC64 &&
+               "Only 2 custom RegLocs expected for 64-bit codegen.");
+        HandleCustomVecRegLoc();
+        HandleCustomVecRegLoc();
+      }
+
       continue;
+    }
 
     if (VA.isRegLoc()) {
       if (VA.getValVT().isScalarInteger())
@@ -6654,9 +6828,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
       continue;
     }
 
-    EVT ValVT = VA.getValVT();
     if (VA.isRegLoc() && !VA.needsCustom()) {
-      MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
+      MVT::SimpleValueType SVT = ValVT.SimpleTy;
       unsigned VReg =
           MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
@@ -6669,23 +6842,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
       continue;
     }
     if (VA.isMemLoc()) {
-      const unsigned LocSize = LocVT.getStoreSize();
-      const unsigned ValSize = ValVT.getStoreSize();
-      assert((ValSize <= LocSize) &&
-             "Object size is larger than size of MemLoc");
-      int CurArgOffset = VA.getLocMemOffset();
-      // Objects are right-justified because AIX is big-endian.
-      if (LocSize > ValSize)
-        CurArgOffset += LocSize - ValSize;
-      // Potential tail calls could cause overwriting of argument stack slots.
-      const bool IsImmutable =
-          !(getTargetMachine().Options.GuaranteedTailCallOpt &&
-            (CallConv == CallingConv::Fast));
-      int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
-      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      SDValue ArgValue =
-          DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
-      InVals.push_back(ArgValue);
+      HandleMemLoc();
       continue;
     }
   }
@@ -6766,8 +6923,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
 
   MachineFunction &MF = DAG.getMachineFunction();
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
-                 *DAG.getContext());
+  AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
+                    *DAG.getContext());
 
   // Reserve space for the linkage save area (LSA) on the stack.
   // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
@@ -6936,11 +7093,15 @@ SDValue PPCTargetLowering::LowerCall_AIX(
       continue;
     }
 
+    if (!ValVT.isFloatingPoint())
+      report_fatal_error(
+          "Unexpected register handling for calling convention.");
+
     // Custom handling is used for GPR initializations for vararg float
     // arguments.
     assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
-           ValVT.isFloatingPoint() && LocVT.isInteger() &&
-           "Unexpected register handling for calling convention.");
+           LocVT.isInteger() &&
+           "Custom register handling only expected for VarArg.");
 
     SDValue ArgAsInt =
         DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);

diff  --git a/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee-split.ll b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee-split.ll
new file mode 100644
index 000000000000..bd388e1a8a4f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee-split.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -verify-machineinstrs -stop-before=ppc-vsx-copy -vec-extabi \
+; RUN:     -mcpu=pwr7  -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN: FileCheck %s
+
+;; Testing a variadic callee where a vector argument passed through ellipsis
+;; is passed partially in registers and on the stack. The 3 fixed double
+;; arguments shadow r3-r8, and a vector int <4 x i32> is passed in R9/R10 and
+;; on the stack starting at the shadow of R9.
+define <4 x i32> @split_spill(double %d1, double %d2, double %d3, ...) {
+  ; CHECK-LABEL: name: split_spill
+  ; CHECK: bb.0.entry:
+  ; CHECK:   liveins: $r9, $r10
+  ; CHECK:   [[COPY:%[0-9]+]]:gprc = COPY $r10
+  ; CHECK:   [[COPY1:%[0-9]+]]:gprc = COPY $r9
+  ; CHECK:   STW [[COPY1]], 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0, align 16)
+  ; CHECK:   STW [[COPY]], 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4)
+  ; CHECK:   LIFETIME_START %stack.0.arg_list
+  ; CHECK:   [[ADDI:%[0-9]+]]:gprc = ADDI %fixed-stack.0, 0
+  ; CHECK:   [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[ADDI]] :: (load 16 from %ir.4)
+  ; CHECK:   LIFETIME_END %stack.0.arg_list
+  ; CHECK:   $v2 = COPY [[LXVW4X]]
+  ; CHECK:   BLR implicit $lr, implicit $rm, implicit $v2
+entry:
+  %arg_list = alloca i8*, align 4
+  %0 = bitcast i8** %arg_list to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
+  call void @llvm.va_start(i8* nonnull %0)
+  %argp.cur = load i8*, i8** %arg_list, align 4
+  %1 = ptrtoint i8* %argp.cur to i32
+  %2 = add i32 %1, 15
+  %3 = and i32 %2, -16
+  %argp.cur.aligned = inttoptr i32 %3 to i8*
+  %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 16
+  store i8* %argp.next, i8** %arg_list, align 4
+  %4 = inttoptr i32 %3 to <4 x i32>*
+  %5 = load <4 x i32>, <4 x i32>* %4, align 16
+  call void @llvm.va_end(i8* nonnull %0)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
+  ret <4 x i32> %5
+}
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_end(i8*)
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)

diff  --git a/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee.ll b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee.ll
new file mode 100644
index 000000000000..aac50e524bb4
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -verify-machineinstrs -stop-before=ppc-vsx-copy -vec-extabi \
+; RUN:     -mcpu=pwr7  -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN: FileCheck %s
+
+; Testing passing a vector <4 x i32> through ellipses of a variadic function.
+define <4 x i32> @callee(i32 %count, ...) {
+  ; CHECK-LABEL: name: callee
+  ; CHECK: bb.0.entry:
+  ; CHECK:   liveins: $r4, $r5, $r6, $r7, $r8, $r9, $r10
+  ; CHECK:   [[COPY:%[0-9]+]]:gprc = COPY $r10
+  ; CHECK:   [[COPY1:%[0-9]+]]:gprc = COPY $r9
+  ; CHECK:   [[COPY2:%[0-9]+]]:gprc = COPY $r8
+  ; CHECK:   [[COPY3:%[0-9]+]]:gprc = COPY $r7
+  ; CHECK:   [[COPY4:%[0-9]+]]:gprc = COPY $r6
+  ; CHECK:   [[COPY5:%[0-9]+]]:gprc = COPY $r5
+  ; CHECK:   [[COPY6:%[0-9]+]]:gprc = COPY $r4
+  ; CHECK:   STW [[COPY6]], 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0)
+  ; CHECK:   STW [[COPY5]], 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4)
+  ; CHECK:   STW [[COPY4]], 8, %fixed-stack.0 :: (store 4)
+  ; CHECK:   STW [[COPY3]], 12, %fixed-stack.0 :: (store 4)
+  ; CHECK:   STW [[COPY2]], 16, %fixed-stack.0 :: (store 4)
+  ; CHECK:   STW [[COPY1]], 20, %fixed-stack.0 :: (store 4)
+  ; CHECK:   STW [[COPY]], 24, %fixed-stack.0 :: (store 4)
+  ; CHECK:   LIFETIME_START %stack.0.arg_list
+  ; CHECK:   [[ADDI:%[0-9]+]]:gprc = ADDI %fixed-stack.0, 0
+  ; CHECK:   STW killed [[ADDI]], 0, %stack.0.arg_list :: (store 4 into %ir.0)
+  ; CHECK:   [[ADDI1:%[0-9]+]]:gprc = ADDI %fixed-stack.0, 15
+  ; CHECK:   [[RLWINM:%[0-9]+]]:gprc = RLWINM killed [[ADDI1]], 0, 0, 27
+  ; CHECK:   [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[RLWINM]] :: (load 16 from %ir.4)
+  ; CHECK:   LIFETIME_END %stack.0.arg_list
+  ; CHECK:   $v2 = COPY [[LXVW4X]]
+  ; CHECK:   BLR implicit $lr, implicit $rm, implicit $v2
+entry:
+  %arg_list = alloca i8*, align 4
+  %0 = bitcast i8** %arg_list to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
+  call void @llvm.va_start(i8* nonnull %0)
+  %argp.cur = load i8*, i8** %arg_list, align 4
+  %1 = ptrtoint i8* %argp.cur to i32
+  %2 = add i32 %1, 15
+  %3 = and i32 %2, -16
+  %argp.cur.aligned = inttoptr i32 %3 to i8*
+  %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 16
+  store i8* %argp.next, i8** %arg_list, align 4
+  %4 = inttoptr i32 %3 to <4 x i32>*
+  %5 = load <4 x i32>, <4 x i32>* %4, align 16
+  call void @llvm.va_end(i8* nonnull %0)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
+  ret <4 x i32> %5
+}
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_end(i8*)
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+

diff  --git a/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-caller-split.ll b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-caller-split.ll
new file mode 100644
index 000000000000..d73efb8393ea
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-caller-split.ll
@@ -0,0 +1,13 @@
+; RUN: not --crash llc -verify-machineinstrs -stop-before=ppc-vsx-copy -vec-extabi \
+; RUN:     -mcpu=pwr7  -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | \
+; RUN: FileCheck %s
+
+define void @caller() {
+entry:
+  %call = tail call <4 x i32> (double, double, double, ...) @split_spill(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, <4 x i32> <i32 1, i32 2, i32 3, i32 4>)
+  ret void
+}
+
+declare <4 x i32> @split_spill(double, double, double, ...)
+
+; CHECK: ERROR: Unexpected register handling for calling convention.

diff  --git a/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-fixed-callee.ll b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-fixed-callee.ll
new file mode 100644
index 000000000000..aad417775336
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-fixed-callee.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -verify-machineinstrs -stop-before=ppc-vsx-copy -vec-extabi \
+; RUN:     -mcpu=pwr7  -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN: FileCheck %s
+
+;; Fixed vector arguments to variadic functions are passed 
diff erently than
+;; either arguments to non-variadic functions or arguments passed through
+;; ellipses.
+define double @callee(i32 %count, <4 x i32> %vsi, double %next, ...) {
+  ; CHECK-LABEL: name: callee
+  ; CHECK: bb.0.entry:
+  ; CHECK:   LIFETIME_START %stack.0.arg_list
+  ; CHECK:   [[ADDI:%[0-9]+]]:gprc = ADDI %fixed-stack.0, 0
+  ; CHECK:   STW killed [[ADDI]], 0, %stack.0.arg_list :: (store 4 into %ir.0)
+  ; CHECK:   [[ADDI1:%[0-9]+]]:gprc = ADDI %fixed-stack.0, 15
+  ; CHECK:   [[RLWINM:%[0-9]+]]:gprc_and_gprc_nor0 = RLWINM killed [[ADDI1]], 0, 0, 27
+  ; CHECK:   [[ADDI2:%[0-9]+]]:gprc = nuw ADDI killed [[RLWINM]], 16
+  ; CHECK:   [[XFLOADf64_:%[0-9]+]]:vsfrc = XFLOADf64 $zero, killed [[ADDI2]] :: (load 8 from %ir.4, align 16)
+  ; CHECK:   LIFETIME_END %stack.0.arg_list
+  ; CHECK:   $f1 = COPY [[XFLOADf64_]]
+  ; CHECK:   BLR implicit $lr, implicit $rm, implicit $f1
+entry:
+  %arg_list = alloca i8*, align 4
+  %0 = bitcast i8** %arg_list to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
+  call void @llvm.va_start(i8* nonnull %0)
+  %argp.cur = load i8*, i8** %arg_list, align 4
+  %1 = ptrtoint i8* %argp.cur to i32
+  %2 = add i32 %1, 15
+  %3 = and i32 %2, -16
+  %argp.cur.aligned = inttoptr i32 %3 to i8*
+  %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 16
+  %argp.next3 = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 24
+  store i8* %argp.next3, i8** %arg_list, align 4
+  %4 = bitcast i8* %argp.next to double*
+  %5 = load double, double* %4, align 16
+  call void @llvm.va_end(i8* nonnull %0)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
+  ret double %5
+}
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_end(i8*)
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+

diff  --git a/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-callee.ll b/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-callee.ll
new file mode 100644
index 000000000000..056bb90c6813
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-callee.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -verify-machineinstrs -stop-before=ppc-vsx-copy -vec-extabi \
+; RUN:     -mcpu=pwr7  -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN: FileCheck %s
+
+; Testing passing a vector <4 x i32> through ellipses of a variadic function.
+define <4 x i32> @callee(i32 signext %count, ...) {
+  ; CHECK-LABEL: name: callee
+  ; CHECK: bb.0.entry:
+  ; CHECK:   liveins: $x4, $x5, $x6, $x7, $x8, $x9, $x10
+  ; CHECK:   [[COPY:%[0-9]+]]:g8rc = COPY $x10
+  ; CHECK:   [[COPY1:%[0-9]+]]:g8rc = COPY $x9
+  ; CHECK:   [[COPY2:%[0-9]+]]:g8rc = COPY $x8
+  ; CHECK:   [[COPY3:%[0-9]+]]:g8rc = COPY $x7
+  ; CHECK:   [[COPY4:%[0-9]+]]:g8rc = COPY $x6
+  ; CHECK:   [[COPY5:%[0-9]+]]:g8rc = COPY $x5
+  ; CHECK:   [[COPY6:%[0-9]+]]:g8rc = COPY $x4
+  ; CHECK:   STD [[COPY6]], 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0)
+  ; CHECK:   STD [[COPY5]], 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8)
+  ; CHECK:   STD [[COPY4]], 16, %fixed-stack.0 :: (store 8)
+  ; CHECK:   STD [[COPY3]], 24, %fixed-stack.0 :: (store 8)
+  ; CHECK:   STD [[COPY2]], 32, %fixed-stack.0 :: (store 8)
+  ; CHECK:   STD [[COPY1]], 40, %fixed-stack.0 :: (store 8)
+  ; CHECK:   STD [[COPY]], 48, %fixed-stack.0 :: (store 8)
+  ; CHECK:   LIFETIME_START %stack.0.arg_list
+  ; CHECK:   [[ADDI8_:%[0-9]+]]:g8rc = ADDI8 %fixed-stack.0, 0
+  ; CHECK:   STD killed [[ADDI8_]], 0, %stack.0.arg_list :: (store 8 into %ir.0)
+  ; CHECK:   [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 %fixed-stack.0, 15
+  ; CHECK:   [[RLDICR:%[0-9]+]]:g8rc = RLDICR killed [[ADDI8_1]], 0, 59
+  ; CHECK:   [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[RLDICR]] :: (load 16 from %ir.4)
+  ; CHECK:   LIFETIME_END %stack.0.arg_list
+  ; CHECK:   $v2 = COPY [[LXVW4X]]
+  ; CHECK:   BLR8 implicit $lr8, implicit $rm, implicit $v2
+entry:
+  %arg_list = alloca i8*, align 8
+  %0 = bitcast i8** %arg_list to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0)
+  call void @llvm.va_start(i8* nonnull %0)
+  %argp.cur = load i8*, i8** %arg_list, align 8
+  %1 = ptrtoint i8* %argp.cur to i64
+  %2 = add i64 %1, 15
+  %3 = and i64 %2, -16
+  %argp.cur.aligned = inttoptr i64 %3 to i8*
+  %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i64 16
+  store i8* %argp.next, i8** %arg_list, align 8
+  %4 = inttoptr i64 %3 to <4 x i32>*
+  %5 = load <4 x i32>, <4 x i32>* %4, align 16
+  call void @llvm.va_end(i8* nonnull %0)
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0)
+  ret <4 x i32> %5
+}
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_end(i8*)
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+

diff  --git a/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-fixed-callee.ll b/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-fixed-callee.ll
new file mode 100644
index 000000000000..a1da0b099340
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-fixed-callee.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -verify-machineinstrs -stop-before=ppc-vsx-copy -vec-extabi \
+; RUN:     -mcpu=pwr7  -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN: FileCheck %s
+
+;; Fixed vector arguments to variadic functions are passed 
diff erently than
+;; either arguments to non-variadic functions or arguments passed through
+;; ellipses.
+define double @callee(i32 signext %count, <4 x i32> %vsi, double %next, ...) {
+  ; CHECK-LABEL: name: callee
+  ; CHECK: bb.0.entry:
+  ; CHECK:   liveins: $x8, $x9, $x10
+  ; CHECK:   [[COPY:%[0-9]+]]:g8rc = COPY $x10
+  ; CHECK:   [[COPY1:%[0-9]+]]:g8rc = COPY $x9
+  ; CHECK:   [[COPY2:%[0-9]+]]:g8rc = COPY $x8
+  ; CHECK:   STD [[COPY2]], 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0)
+  ; CHECK:   STD [[COPY1]], 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8)
+  ; CHECK:   STD [[COPY]], 16, %fixed-stack.0 :: (store 8)
+  ; CHECK:   LIFETIME_START %stack.0.arg_list
+  ; CHECK:   [[ADDI8_:%[0-9]+]]:g8rc = ADDI8 %fixed-stack.0, 0
+  ; CHECK:   STD killed [[ADDI8_]], 0, %stack.0.arg_list :: (store 8 into %ir.0)
+  ; CHECK:   [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 %fixed-stack.0, 15
+  ; CHECK:   [[RLDICR:%[0-9]+]]:g8rc_and_g8rc_nox0 = RLDICR killed [[ADDI8_1]], 0, 59
+  ; CHECK:   [[LI8_:%[0-9]+]]:g8rc = LI8 16
+  ; CHECK:   [[XFLOADf64_:%[0-9]+]]:vsfrc = XFLOADf64 killed [[RLDICR]], killed [[LI8_]] :: (load 8 from %ir.4, align 16)
+  ; CHECK:   LIFETIME_END %stack.0.arg_list
+  ; CHECK:   $f1 = COPY [[XFLOADf64_]]
+  ; CHECK:   BLR8 implicit $lr8, implicit $rm, implicit $f1
+entry:
+  %arg_list = alloca i8*, align 8
+  %0 = bitcast i8** %arg_list to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0)
+  call void @llvm.va_start(i8* nonnull %0)
+  %argp.cur = load i8*, i8** %arg_list, align 8
+  %1 = ptrtoint i8* %argp.cur to i64
+  %2 = add i64 %1, 15
+  %3 = and i64 %2, -16
+  %argp.cur.aligned = inttoptr i64 %3 to i8*
+  %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i64 16
+  %argp.next3 = getelementptr inbounds i8, i8* %argp.cur.aligned, i64 24
+  store i8* %argp.next3, i8** %arg_list, align 8
+  %4 = bitcast i8* %argp.next to double*
+  %5 = load double, double* %4, align 16
+  call void @llvm.va_end(i8* nonnull %0)
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0)
+  ret double %5
+}
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_end(i8*)
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)


        


More information about the llvm-commits mailing list