[llvm] r305083 - Reland "[SelectionDAG] Enable target specific vector scalarization of calls and returns"

Fri Jun 9 07:37:08 PDT 2017

Author: sdardis
Date: Fri Jun  9 09:37:08 2017
New Revision: 305083

URL: http://llvm.org/viewvc/llvm-project?rev=305083&view=rev
Log:
Reland "[SelectionDAG] Enable target specific vector scalarization of calls and returns"

By target hookifying getRegisterType, getNumRegisters, getVectorBreakdown,
backends can request that LLVM to scalarize vector types for calls
and returns.

The MIPS vector ABI requires that vector arguments and returns are passed in
integer registers. With SelectionDAG's new hooks, the MIPS backend can now
handle LLVM-IR with vector types in calls and returns. E.g.
'call @foo(<4 x i32> %4)'.

Previously these cases would be scalarized for the MIPS O32/N32/N64 ABI for
calls and returns if vector types were not legal. If vector types were legal,
a single 128bit vector argument would be assigned to a single 32 bit / 64 bit
integer register.

By teaching the MIPS backend to inspect the original types, it can now
implement the MIPS vector ABI which requires a particular method of
scalarizing vectors.

Previously, the MIPS backend relied on clang to scalarize types such as "call
@foo(<4 x float> %a) into "call @foo(i32 inreg %1, i32 inreg %2, i32 inreg %3,
i32 inreg %4)".

This patch enables the MIPS backend to take either form for vector types.

The previous version of this patch had a "conditional move or jump depends on
uninitialized value".

Reviewers: zoran.jovanovic, jaydeep, vkalintiris, slthakur

Differential Revision: https://reviews.llvm.org/D27845

Added:
    llvm/trunk/test/CodeGen/Mips/cconv/vector.ll
Modified:
    llvm/trunk/include/llvm/Target/TargetLowering.h
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
    llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
    llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
    llvm/trunk/lib/Target/Mips/MipsCCState.cpp
    llvm/trunk/lib/Target/Mips/MipsCCState.h
    llvm/trunk/lib/Target/Mips/MipsCallingConv.td
    llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp
    llvm/trunk/lib/Target/Mips/MipsISelLowering.h
    llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp
    llvm/trunk/test/CodeGen/Mips/ctlz-v.ll
    llvm/trunk/test/CodeGen/Mips/cttz-v.ll
    llvm/trunk/test/CodeGen/Mips/return-vector.ll

Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=305083&r1=305082&r2=305083&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Fri Jun  9 09:37:08 2017
@@ -677,6 +677,16 @@ public:
                                   unsigned &NumIntermediates,
                                   MVT &RegisterVT) const;
 
+  /// Certain targets such as MIPS require that some types such as vectors are
+  /// always broken down into scalars in some contexts. This occurs even if the
+  /// vector type is legal.
+  virtual unsigned getVectorTypeBreakdownForCallingConv(
+      LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+      unsigned &NumIntermediates, MVT &RegisterVT) const {
+    return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
+                                  RegisterVT);
+  }
+
   struct IntrinsicInfo {
     unsigned     opc = 0;          // target opcode
     EVT          memVT;            // memory VT
@@ -1085,6 +1095,33 @@ public:
     llvm_unreachable("Unsupported extended type!");
   }
 
+  /// Certain combinations of ABIs, Targets and features require that types
+  /// are legal for some operations and not for other operations.
+  /// For MIPS all vector types must be passed through the integer register set.
+  virtual MVT getRegisterTypeForCallingConv(MVT VT) const {
+    return getRegisterType(VT);
+  }
+
+  virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+                                            EVT VT) const {
+    return getRegisterType(Context, VT);
+  }
+
+  /// Certain targets require unusual breakdowns of certain types. For MIPS,
+  /// this occurs when a vector type is used, as vector are passed through the
+  /// integer register set.
+  virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+                                                 EVT VT) const {
+    return getNumRegisters(Context, VT);
+  }
+
+  /// Certain targets have context senstive alignment requirements, where one
+  /// type has the alignment requirement of another type.
+  virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy,
+                                                 DataLayout DL) const {
+    return DL.getABITypeAlignment(ArgTy);
+  }
+
   /// If true, then instruction selection should seek to shrink the FP constant
   /// of the specified type to a smaller type in order to save space and / or
   /// reduce runtime.

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=305083&r1=305082&r2=305083&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Fri Jun  9 09:37:08 2017
@@ -101,7 +101,8 @@ static const unsigned MaxParallelChains
 
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
                                       const SDValue *Parts, unsigned NumParts,
-                                      MVT PartVT, EVT ValueVT, const Value *V);
+                                      MVT PartVT, EVT ValueVT, const Value *V,
+                                      bool IsABIRegCopy);
 
 /// getCopyFromParts - Create a value that contains the specified legal parts
 /// combined into the value they represent.  If the parts combine to a type
@@ -111,10 +112,11 @@ static SDValue getCopyFromPartsVector(Se
 static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
                                 const SDValue *Parts, unsigned NumParts,
                                 MVT PartVT, EVT ValueVT, const Value *V,
-                                Optional<ISD::NodeType> AssertOp = None) {
+                                Optional<ISD::NodeType> AssertOp = None,
+                                bool IsABIRegCopy = false) {
   if (ValueVT.isVector())
     return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
-                                  PartVT, ValueVT, V);
+                                  PartVT, ValueVT, V, IsABIRegCopy);
 
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -258,7 +260,8 @@ static void diagnosePossiblyInvalidConst
 /// ValueVT (ISD::AssertSext).
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
                                       const SDValue *Parts, unsigned NumParts,
-                                      MVT PartVT, EVT ValueVT, const Value *V) {
+                                      MVT PartVT, EVT ValueVT, const Value *V,
+                                      bool IsABIRegCopy) {
   assert(ValueVT.isVector() && "Not a vector value");
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -269,9 +272,18 @@ static SDValue getCopyFromPartsVector(Se
     EVT IntermediateVT;
     MVT RegisterVT;
     unsigned NumIntermediates;
-    unsigned NumRegs =
-    TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
-                               NumIntermediates, RegisterVT);
+    unsigned NumRegs;
+
+    if (IsABIRegCopy) {
+      NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
+          *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
+          RegisterVT);
+    } else {
+      NumRegs =
+          TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+                                     NumIntermediates, RegisterVT);
+    }
+
     assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
     NumParts = NumRegs; // Silence a compiler warning.
     assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
@@ -300,9 +312,14 @@ static SDValue getCopyFromPartsVector(Se
 
     // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
     // intermediate operands.
+    EVT BuiltVectorTy =
+        EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
+                         (IntermediateVT.isVector()
+                              ? IntermediateVT.getVectorNumElements() * NumParts
+                              : NumIntermediates));
     Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
                                                 : ISD::BUILD_VECTOR,
-                      DL, ValueVT, Ops);
+                      DL, BuiltVectorTy, Ops);
   }
 
   // There is now one part, held in Val.  Correct it to match ValueVT.
@@ -341,13 +358,29 @@ static SDValue getCopyFromPartsVector(Se
       TLI.isTypeLegal(ValueVT))
     return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
-  // Handle cases such as i8 -> <1 x i1>
   if (ValueVT.getVectorNumElements() != 1) {
-    diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
-                                      "non-trivial scalar-to-vector conversion");
-    return DAG.getUNDEF(ValueVT);
+     // Certain ABIs require that vectors are passed as integers. For vectors
+     // are the same size, this is an obvious bitcast.
+     if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
+       return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+     } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
+       // Bitcast Val back the original type and extract the corresponding
+       // vector we want.
+       unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
+       EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
+                                           ValueVT.getVectorElementType(), Elts);
+       Val = DAG.getBitcast(WiderVecType, Val);
+       return DAG.getNode(
+           ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+           DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+     }
+
+     diagnosePossiblyInvalidConstraint(
+         *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
+     return DAG.getUNDEF(ValueVT);
   }
 
+  // Handle cases such as i8 -> <1 x i1>
   EVT ValueSVT = ValueVT.getVectorElementType();
   if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
     Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
@@ -358,7 +391,7 @@ static SDValue getCopyFromPartsVector(Se
 
 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
                                  SDValue Val, SDValue *Parts, unsigned NumParts,
-                                 MVT PartVT, const Value *V);
+                                 MVT PartVT, const Value *V, bool IsABIRegCopy);
 
 /// getCopyToParts - Create a series of nodes that contain the specified value
 /// split into legal parts.  If the parts contain more bits than Val, then, for
@@ -366,12 +399,14 @@ static void getCopyToPartsVector(Selecti
 static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
                            SDValue *Parts, unsigned NumParts, MVT PartVT,
                            const Value *V,
-                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND,
+                           bool IsABIRegCopy = false) {
   EVT ValueVT = Val.getValueType();
 
   // Handle the vector case separately.
   if (ValueVT.isVector())
-    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
+    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
+                                IsABIRegCopy);
 
   unsigned PartBits = PartVT.getSizeInBits();
   unsigned OrigNumParts = NumParts;
@@ -496,7 +531,9 @@ static void getCopyToParts(SelectionDAG
 /// value split into legal parts.
 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
                                  SDValue Val, SDValue *Parts, unsigned NumParts,
-                                 MVT PartVT, const Value *V) {
+                                 MVT PartVT, const Value *V,
+                                 bool IsABIRegCopy) {
+
   EVT ValueVT = Val.getValueType();
   assert(ValueVT.isVector() && "Not a vector");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -537,13 +574,20 @@ static void getCopyToPartsVector(Selecti
 
       // Promoted vector extract
       Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
-    } else{
-      // Vector -> scalar conversion.
-      assert(ValueVT.getVectorNumElements() == 1 &&
-             "Only trivial vector-to-scalar conversions should get here!");
-      Val = DAG.getNode(
-          ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
-          DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+    } else {
+      if (ValueVT.getVectorNumElements() == 1) {
+        Val = DAG.getNode(
+            ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
+            DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+      } else {
+        assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
+               "lossy conversion of vector to scalar type");
+        EVT IntermediateType =
+            EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+        Val = DAG.getBitcast(IntermediateType, Val);
+        Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+      }
     }
 
     assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
@@ -555,15 +599,31 @@ static void getCopyToPartsVector(Selecti
   EVT IntermediateVT;
   MVT RegisterVT;
   unsigned NumIntermediates;
-  unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
-                                                IntermediateVT,
-                                                NumIntermediates, RegisterVT);
+  unsigned NumRegs;
+  if (IsABIRegCopy) {
+    NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
+        *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
+        RegisterVT);
+  } else {
+    NumRegs =
+        TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+                                   NumIntermediates, RegisterVT);
+  }
   unsigned NumElements = ValueVT.getVectorNumElements();
 
   assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
   NumParts = NumRegs; // Silence a compiler warning.
   assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
 
+  // Convert the vector to the appropiate type if necessary.
+  unsigned DestVectorNoElts =
+      NumIntermediates *
+      (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1);
+  EVT BuiltVectorTy = EVT::getVectorVT(
+      *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
+  if (Val.getValueType() != BuiltVectorTy)
+    Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
+
   // Split the vector into intermediate operands.
   SmallVector<SDValue, 8> Ops(NumIntermediates);
   for (unsigned i = 0; i != NumIntermediates; ++i) {
@@ -596,22 +656,31 @@ static void getCopyToPartsVector(Selecti
   }
 }
 
-RegsForValue::RegsForValue() {}
+RegsForValue::RegsForValue() { IsABIMangled = false; }
 
 RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
-                           EVT valuevt)
-    : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+                           EVT valuevt, bool IsABIMangledValue)
+    : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
+      RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {}
 
 RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
-                           const DataLayout &DL, unsigned Reg, Type *Ty) {
+                           const DataLayout &DL, unsigned Reg, Type *Ty,
+                           bool IsABIMangledValue) {
   ComputeValueVTs(TLI, DL, Ty, ValueVTs);
 
+  IsABIMangled = IsABIMangledValue;
+
   for (EVT ValueVT : ValueVTs) {
-    unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT);
-    MVT RegisterVT = TLI.getRegisterType(Context, ValueVT);
+    unsigned NumRegs = IsABIMangledValue
+                           ? TLI.getNumRegistersForCallingConv(Context, ValueVT)
+                           : TLI.getNumRegisters(Context, ValueVT);
+    MVT RegisterVT = IsABIMangledValue
+                         ? TLI.getRegisterTypeForCallingConv(Context, ValueVT)
+                         : TLI.getRegisterType(Context, ValueVT);
     for (unsigned i = 0; i != NumRegs; ++i)
       Regs.push_back(Reg + i);
     RegVTs.push_back(RegisterVT);
+    RegCount.push_back(NumRegs);
     Reg += NumRegs;
   }
 }
@@ -632,8 +701,10 @@ SDValue RegsForValue::getCopyFromRegs(Se
   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
     // Copy the legal parts from the registers.
     EVT ValueVT = ValueVTs[Value];
-    unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
-    MVT RegisterVT = RegVTs[Value];
+    unsigned NumRegs = RegCount[Value];
+    MVT RegisterVT = IsABIMangled
+                         ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
+                         : RegVTs[Value];
 
     Parts.resize(NumRegs);
     for (unsigned i = 0; i != NumRegs; ++i) {
@@ -728,9 +799,11 @@ void RegsForValue::getCopyToRegs(SDValue
   unsigned NumRegs = Regs.size();
   SmallVector<SDValue, 8> Parts(NumRegs);
   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    EVT ValueVT = ValueVTs[Value];
-    unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
-    MVT RegisterVT = RegVTs[Value];
+    unsigned NumParts = RegCount[Value];
+
+    MVT RegisterVT = IsABIMangled
+                         ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
+                         : RegVTs[Value];
 
     if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
       ExtendKind = ISD::ZERO_EXTEND;
@@ -953,10 +1026,16 @@ SDValue SelectionDAGBuilder::getCopyFrom
 
   if (It != FuncInfo.ValueMap.end()) {
     unsigned InReg = It->second;
+    bool IsABIRegCopy =
+        V && ((isa<CallInst>(V) &&
+               !(static_cast<const CallInst *>(V))->isInlineAsm()) ||
+              isa<ReturnInst>(V));
+
     RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
-                     DAG.getDataLayout(), InReg, Ty);
+                     DAG.getDataLayout(), InReg, Ty, IsABIRegCopy);
     SDValue Chain = DAG.getEntryNode();
-    Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
+    Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
+                                 V);
     resolveDanglingDebugInfo(V, Result);
   }
 
@@ -1142,8 +1221,13 @@ SDValue SelectionDAGBuilder::getValueImp
   // If this is an instruction which fast-isel has deferred, select it now.
   if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
     unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+    bool IsABIRegCopy =
+        V && ((isa<CallInst>(V) &&
+               !(static_cast<const CallInst *>(V))->isInlineAsm()) ||
+              isa<ReturnInst>(V));
+
     RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
-                     Inst->getType());
+                     Inst->getType(), IsABIRegCopy);
     SDValue Chain = DAG.getEntryNode();
     return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
   }
@@ -1371,12 +1455,12 @@ void SelectionDAGBuilder::visitRet(const
         if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
           VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
 
-        unsigned NumParts = TLI.getNumRegisters(Context, VT);
-        MVT PartVT = TLI.getRegisterType(Context, VT);
+        unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT);
+        MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT);
         SmallVector<SDValue, 4> Parts(NumParts);
         getCopyToParts(DAG, getCurSDLoc(),
                        SDValue(RetOp.getNode(), RetOp.getResNo() + j),
-                       &Parts[0], NumParts, PartVT, &I, ExtendKind);
+                       &Parts[0], NumParts, PartVT, &I, ExtendKind, true);
 
         // 'inreg' on function refers to return value
         ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
@@ -7112,8 +7196,8 @@ void SelectionDAGBuilder::visitInlineAsm
 
           SDLoc dl = getCurSDLoc();
           // Use the produced MatchedRegs object to
-          MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl,
-                                    Chain, &Flag, CS.getInstruction());
+          MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
+                                    CS.getInstruction());
           MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
                                            true, OpInfo.getMatchedOperand(), dl,
                                            DAG, AsmNodeOperands);
@@ -7799,8 +7883,10 @@ TargetLowering::LowerCallTo(TargetLoweri
   } else {
     for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
       EVT VT = RetTys[I];
-      MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
-      unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+      MVT RegisterVT =
+          getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+      unsigned NumRegs =
+          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
       for (unsigned i = 0; i != NumRegs; ++i) {
         ISD::InputArg MyFlags;
         MyFlags.VT = RegisterVT;
@@ -7849,7 +7935,11 @@ TargetLowering::LowerCallTo(TargetLoweri
       SDValue Op = SDValue(Args[i].Node.getNode(),
                            Args[i].Node.getResNo() + Value);
       ISD::ArgFlagsTy Flags;
-      unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
+
+      // Certain targets (such as MIPS), may have a different ABI alignment
+      // for a type depending on the context. Give the target a chance to
+      // specify the alignment it wants.
+      unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL);
 
       if (Args[i].IsZExt)
         Flags.setZExt();
@@ -7904,8 +7994,9 @@ TargetLowering::LowerCallTo(TargetLoweri
         Flags.setInConsecutiveRegs();
       Flags.setOrigAlign(OriginalAlignment);
 
-      MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
-      unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT);
+      MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+      unsigned NumParts =
+          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
       SmallVector<SDValue, 4> Parts(NumParts);
       ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
 
@@ -7935,7 +8026,8 @@ TargetLowering::LowerCallTo(TargetLoweri
       }
 
       getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
-                     CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind);
+                     CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind,
+                     true);
 
       for (unsigned j = 0; j != NumParts; ++j) {
         // if it isn't first piece, alignment must be 1
@@ -8035,12 +8127,14 @@ TargetLowering::LowerCallTo(TargetLoweri
     unsigned CurReg = 0;
     for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
       EVT VT = RetTys[I];
-      MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
-      unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+      MVT RegisterVT =
+          getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+      unsigned NumRegs =
+          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
 
       ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
                                               NumRegs, RegisterVT, VT, nullptr,
-                                              AssertOp));
+                                              AssertOp, true));
       CurReg += NumRegs;
     }
 
@@ -8076,8 +8170,15 @@ SelectionDAGBuilder::CopyValueToVirtualR
   assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  // If this is an InlineAsm we have to match the registers required, not the
+  // notional registers required by the type.
+  bool IsABIRegCopy =
+    V && ((isa<CallInst>(V) &&
+           !(static_cast<const CallInst *>(V))->isInlineAsm()) ||
+          isa<ReturnInst>(V));
+
   RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
-                   V->getType());
+                   V->getType(), IsABIRegCopy);
   SDValue Chain = DAG.getEntryNode();
 
   ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
@@ -8319,7 +8420,12 @@ void SelectionDAGISel::LowerArguments(co
       EVT VT = ValueVTs[Value];
       Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
       ISD::ArgFlagsTy Flags;
-      unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
+
+      // Certain targets (such as MIPS), may have a different ABI alignment
+      // for a type depending on the context. Give the target a chance to
+      // specify the alignment it wants.
+      unsigned OriginalAlignment =
+          TLI->getABIAlignmentForCallingConv(ArgTy, DL);
 
       if (Arg.hasAttribute(Attribute::ZExt))
         Flags.setZExt();
@@ -8381,8 +8487,10 @@ void SelectionDAGISel::LowerArguments(co
       if (ArgCopyElisionCandidates.count(&Arg))
         Flags.setCopyElisionCandidate();
 
-      MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
-      unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
+      MVT RegisterVT =
+          TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
+      unsigned NumRegs =
+          TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
       for (unsigned i = 0; i != NumRegs; ++i) {
         ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
                               ArgNo, PartBase+i*RegisterVT.getStoreSize());
@@ -8486,8 +8594,10 @@ void SelectionDAGISel::LowerArguments(co
 
     for (unsigned Val = 0; Val != NumValues; ++Val) {
       EVT VT = ValueVTs[Val];
-      MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
-      unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT);
+      MVT PartVT =
+          TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
+      unsigned NumParts =
+          TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
 
       // Even an apparant 'unused' swifterror argument needs to be returned. So
       // we do generate a copy for it that can be used on return from the
@@ -8500,7 +8610,8 @@ void SelectionDAGISel::LowerArguments(co
           AssertOp = ISD::AssertZext;
 
         ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
-                                             PartVT, VT, nullptr, AssertOp));
+                                             PartVT, VT, nullptr, AssertOp,
+                                             true));
       }
 
       i += NumParts;

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h?rev=305083&r1=305082&r2=305083&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h Fri Jun  9 09:37:08 2017
@@ -975,18 +975,28 @@ struct RegsForValue {
   /// expanded value requires multiple registers.
   SmallVector<unsigned, 4> Regs;
 
+  /// This list holds the number of registers for each value.
+  SmallVector<unsigned, 4> RegCount;
+
+  /// Records if this value needs to be treated in an ABI dependant manner,
+  /// different to normal type legalization.
+  bool IsABIMangled;
+
   RegsForValue();
 
-  RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt);
+  RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt,
+               bool IsABIMangledValue = false);
 
   RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
-               const DataLayout &DL, unsigned Reg, Type *Ty);
+               const DataLayout &DL, unsigned Reg, Type *Ty,
+               bool IsABIMangledValue = false);
 
   /// Add the specified values to this one.
   void append(const RegsForValue &RHS) {
     ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
     RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
     Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+    RegCount.push_back(RHS.Regs.size());
   }
 
   /// Emit a series of CopyFromReg nodes that copies from this value and returns

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp?rev=305083&r1=305082&r2=305083&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp Fri Jun  9 09:37:08 2017
@@ -840,7 +840,7 @@ SelectionDAGBuilder::LowerStatepoint(Imm
       //       completely and make statepoint call to return a tuple.
       unsigned Reg = FuncInfo.CreateRegs(RetTy);
       RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
-                       DAG.getDataLayout(), Reg, RetTy);
+                       DAG.getDataLayout(), Reg, RetTy, true);
       SDValue Chain = DAG.getEntryNode();
 
       RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);

Modified: llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp?rev=305083&r1=305082&r2=305083&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp (original)
+++ llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp Fri Jun  9 09:37:08 2017
@@ -1637,8 +1637,10 @@ void llvm::GetReturnInfo(Type *ReturnTyp
         VT = MinVT;
     }
 
-    unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
-    MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+    unsigned NumParts =
+        TLI.getNumRegistersForCallingConv(ReturnType->getContext(), VT);
+    MVT PartVT =
+        TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), VT);
 
     // 'inreg' on function refers to return value
     ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();

Modified: llvm/trunk/lib/Target/Mips/MipsCCState.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsCCState.cpp?rev=305083&r1=305082&r2=305083&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Mips/MipsCCState.cpp (original)
+++ llvm/trunk/lib/Target/Mips/MipsCCState.cpp Fri Jun  9 09:37:08 2017
@@ -51,6 +51,22 @@ static bool originalTypeIsF128(const Typ
   return (Func && Ty->isIntegerTy(128) && isF128SoftLibCall(Func));
 }
 
+/// Return true if the original type was vXfXX.
+static bool originalEVTTypeIsVectorFloat(EVT Ty) {
+  if (Ty.isVector() && Ty.getVectorElementType().isFloatingPoint())
+    return true;
+
+  return false;
+}
+
+/// Return true if the original type was vXfXX / vXfXX.
+static bool originalTypeIsVectorFloat(const Type * Ty) {
+  if (Ty->isVectorTy() && Ty->isFPOrFPVectorTy())
+    return true;
+
+  return false;
+}
+
 MipsCCState::SpecialCallingConvType
 MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee,
                                             const MipsSubtarget &Subtarget) {
@@ -78,8 +94,8 @@ void MipsCCState::PreAnalyzeCallResultFo
   }
 }
 
-/// Identify lowered values that originated from f128 arguments and record
-/// this for use by RetCC_MipsN.
+/// Identify lowered values that originated from f128 or float arguments and
+/// record this for use by RetCC_MipsN.
 void MipsCCState::PreAnalyzeReturnForF128(
     const SmallVectorImpl<ISD::OutputArg> &Outs) {
   const MachineFunction &MF = getMachineFunction();
@@ -91,23 +107,44 @@ void MipsCCState::PreAnalyzeReturnForF12
   }
 }
 
-/// Identify lowered values that originated from f128 arguments and record
+/// Identify lower values that originated from vXfXX and record
+/// this.
+void MipsCCState::PreAnalyzeCallResultForVectorFloat(
+    const SmallVectorImpl<ISD::InputArg> &Ins, const Type *RetTy) {
+  for (unsigned i = 0; i < Ins.size(); ++i) {
+    OriginalRetWasFloatVector.push_back(originalTypeIsVectorFloat(RetTy));
+  }
+}
+
+/// Identify lowered values that originated from vXfXX arguments and record
 /// this.
+void MipsCCState::PreAnalyzeReturnForVectorFloat(
+    const SmallVectorImpl<ISD::OutputArg> &Outs) {
+  for (unsigned i = 0; i < Outs.size(); ++i) {
+    ISD::OutputArg Out = Outs[i];
+    OriginalRetWasFloatVector.push_back(
+        originalEVTTypeIsVectorFloat(Out.ArgVT));
+  }
+}
+
+/// Identify lowered values that originated from f128, float and sret to vXfXX
+/// arguments and record this.
 void MipsCCState::PreAnalyzeCallOperands(
     const SmallVectorImpl<ISD::OutputArg> &Outs,
     std::vector<TargetLowering::ArgListEntry> &FuncArgs,
     const char *Func) {
   for (unsigned i = 0; i < Outs.size(); ++i) {
-    OriginalArgWasF128.push_back(
-        originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, Func));
-    OriginalArgWasFloat.push_back(
-        FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy());
+    TargetLowering::ArgListEntry FuncArg = FuncArgs[Outs[i].OrigArgIndex];
+
+    OriginalArgWasF128.push_back(originalTypeIsF128(FuncArg.Ty, Func));
+    OriginalArgWasFloat.push_back(FuncArg.Ty->isFloatingPointTy());
+    OriginalArgWasFloatVector.push_back(FuncArg.Ty->isVectorTy());
     CallOperandIsFixed.push_back(Outs[i].IsFixed);
   }
 }
 
-/// Identify lowered values that originated from f128 arguments and record
-/// this.
+/// Identify lowered values that originated from f128, float and vXfXX arguments
+/// and record this.
 void MipsCCState::PreAnalyzeFormalArgumentsForF128(
     const SmallVectorImpl<ISD::InputArg> &Ins) {
   const MachineFunction &MF = getMachineFunction();
@@ -120,6 +157,7 @@ void MipsCCState::PreAnalyzeFormalArgume
     if (Ins[i].Flags.isSRet()) {
       OriginalArgWasF128.push_back(false);
       OriginalArgWasFloat.push_back(false);
+      OriginalArgWasFloatVector.push_back(false);
       continue;
     }
 
@@ -129,5 +167,10 @@ void MipsCCState::PreAnalyzeFormalArgume
     OriginalArgWasF128.push_back(
         originalTypeIsF128(FuncArg->getType(), nullptr));
     OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy());
+
+    // The MIPS vector ABI exhibits a corner case of sorts or quirk; if the
+    // first argument is actually an SRet pointer to a vector, then the next
+    // argument slot is $a2.
+    OriginalArgWasFloatVector.push_back(FuncArg->getType()->isVectorTy());
   }
 }

Modified: llvm/trunk/lib/Target/Mips/MipsCCState.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsCCState.h?rev=305083&r1=305082&r2=305083&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Mips/MipsCCState.h (original)
+++ llvm/trunk/lib/Target/Mips/MipsCCState.h Fri Jun  9 09:37:08 2017
@@ -45,16 +45,33 @@ private:
                          const char *Func);
 
   /// Identify lowered values that originated from f128 arguments and record
-  /// this.
+  /// this for use by RetCC_MipsN.
   void
   PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl<ISD::InputArg> &Ins);
 
+  void
+  PreAnalyzeCallResultForVectorFloat(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                     const Type *RetTy);
+
+  void PreAnalyzeFormalArgumentsForVectorFloat(
+      const SmallVectorImpl<ISD::InputArg> &Ins);
+
+  void
+  PreAnalyzeReturnForVectorFloat(const SmallVectorImpl<ISD::OutputArg> &Outs);
+
   /// Records whether the value has been lowered from an f128.
   SmallVector<bool, 4> OriginalArgWasF128;
 
   /// Records whether the value has been lowered from float.
   SmallVector<bool, 4> OriginalArgWasFloat;
 
+  /// Records whether the value has been lowered from a floating point vector.
+  SmallVector<bool, 4> OriginalArgWasFloatVector;
+
+  /// Records whether the return value has been lowered from a floating point
+  /// vector.
+  SmallVector<bool, 4> OriginalRetWasFloatVector;
+
   /// Records whether the value was a fixed argument.
   /// See ISD::OutputArg::IsFixed,
   SmallVector<bool, 4> CallOperandIsFixed;
@@ -78,6 +95,7 @@ public:
     CCState::AnalyzeCallOperands(Outs, Fn);
     OriginalArgWasF128.clear();
     OriginalArgWasFloat.clear();
+    OriginalArgWasFloatVector.clear();
     CallOperandIsFixed.clear();
   }
 
@@ -96,31 +114,38 @@ public:
     CCState::AnalyzeFormalArguments(Ins, Fn);
     OriginalArgWasFloat.clear();
     OriginalArgWasF128.clear();
+    OriginalArgWasFloatVector.clear();
   }
 
   void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
                          CCAssignFn Fn, const Type *RetTy,
                          const char *Func) {
     PreAnalyzeCallResultForF128(Ins, RetTy, Func);
+    PreAnalyzeCallResultForVectorFloat(Ins, RetTy);
     CCState::AnalyzeCallResult(Ins, Fn);
     OriginalArgWasFloat.clear();
     OriginalArgWasF128.clear();
+    OriginalArgWasFloatVector.clear();
   }
 
   void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
                      CCAssignFn Fn) {
     PreAnalyzeReturnForF128(Outs);
+    PreAnalyzeReturnForVectorFloat(Outs);
     CCState::AnalyzeReturn(Outs, Fn);
     OriginalArgWasFloat.clear();
     OriginalArgWasF128.clear();
+    OriginalArgWasFloatVector.clear();
   }
 
   bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
                    CCAssignFn Fn) {
     PreAnalyzeReturnForF128(ArgsFlags);
+    PreAnalyzeReturnForVectorFloat(ArgsFlags);
     bool Return = CCState::CheckReturn(ArgsFlags, Fn);
     OriginalArgWasFloat.clear();
     OriginalArgWasF128.clear();
+    OriginalArgWasFloatVector.clear();
     return Return;
   }
 
@@ -128,6 +153,12 @@ public:
   bool WasOriginalArgFloat(unsigned ValNo) {
       return OriginalArgWasFloat[ValNo];
   }
+  bool WasOriginalArgVectorFloat(unsigned ValNo) const {
+    return OriginalArgWasFloatVector[ValNo];
+  }
+  bool WasOriginalRetVectorFloat(unsigned ValNo) const {
+    return OriginalRetWasFloatVector[ValNo];
+  }
   bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; }
   SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; }
 };

Modified: llvm/trunk/lib/Target/Mips/MipsCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsCallingConv.td?rev=305083&r1=305082&r2=305083&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Mips/MipsCallingConv.td (original)
+++ llvm/trunk/lib/Target/Mips/MipsCallingConv.td Fri Jun  9 09:37:08 2017
@@ -37,6 +37,10 @@ class CCIfOrigArgWasF128<CCAction A>
 class CCIfArgIsVarArg<CCAction A>
     : CCIf<"!static_cast<MipsCCState *>(&State)->IsCallOperandFixed(ValNo)", A>;
 
+/// Match if the return was a floating point vector.
+class CCIfOrigArgWasNotVectorFloat<CCAction A>
+    : CCIf<"!static_cast<MipsCCState *>(&State)"
+                "->WasOriginalRetVectorFloat(ValNo)", A>;
 
 /// Match if the special calling conv is the specified value.
 class CCIfSpecialCallingConv<string CC, CCAction A>
@@ -93,8 +97,10 @@ def RetCC_MipsO32 : CallingConv<[
   // Promote i1/i8/i16 return values to i32.
   CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
 
-  // i32 are returned in registers V0, V1, A0, A1
-  CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>,
+  // i32 are returned in registers V0, V1, A0, A1, unless the original return
+  // type was a vector of floats.
+  CCIfOrigArgWasNotVectorFloat<CCIfType<[i32],
+                                        CCAssignToReg<[V0, V1, A0, A1]>>>,
 
   // f32 are returned in registers F0, F2
   CCIfType<[f32], CCAssignToReg<[F0, F2]>>,

Modified: llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp?rev=305083&r1=305082&r2=305083&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp Fri Jun  9 09:37:08 2017
@@ -71,6 +71,48 @@ static bool isShiftedMask(uint64_t I, ui
   return true;
 }
 
+// The MIPS MSA ABI passes vector arguments in the integer register set.
+// The number of integer registers used is dependant on the ABI used.
+MVT MipsTargetLowering::getRegisterTypeForCallingConv(MVT VT) const {
+  if (VT.isVector() && Subtarget.hasMSA())
+    return Subtarget.isABI_O32() ? MVT::i32 : MVT::i64;
+  return MipsTargetLowering::getRegisterType(VT);
+}
+
+MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+                                                      EVT VT) const {
+  if (VT.isVector()) {
+      if (Subtarget.isABI_O32()) {
+        return MVT::i32;
+      } else {
+        return (VT.getSizeInBits() == 32) ? MVT::i32 : MVT::i64;
+      }
+  }
+  return MipsTargetLowering::getRegisterType(Context, VT);
+}
+
+unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+                                                           EVT VT) const {
+  if (VT.isVector())
+    return std::max((VT.getSizeInBits() / (Subtarget.isABI_O32() ? 32 : 64)),
+                    1U);
+  return MipsTargetLowering::getNumRegisters(Context, VT);
+}
+
+unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv(
+    LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+    unsigned &NumIntermediates, MVT &RegisterVT) const {
+
+  // Break down vector types to either 2 i64s or 4 i32s.
+  RegisterVT = getRegisterTypeForCallingConv(Context, VT) ;
+  IntermediateVT = RegisterVT;
+  NumIntermediates = VT.getSizeInBits() < RegisterVT.getSizeInBits()
+                         ? VT.getVectorNumElements()
+                         : VT.getSizeInBits() / RegisterVT.getSizeInBits();
+
+  return NumIntermediates;
+}
+
 SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
   MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>();
   return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
@@ -2552,6 +2594,11 @@ SDValue MipsTargetLowering::lowerFP_TO_S
 //       yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
 //       not used, it must be shadowed. If only A3 is available, shadow it and
 //       go to stack.
+// vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack.
+// vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3}
+//         with the remainder spilled to the stack.
+// vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases
+//         spilling the remainder to the stack.
 //
 //  For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack.
 //===----------------------------------------------------------------------===//
@@ -2563,8 +2610,13 @@ static bool CC_MipsO32(unsigned ValNo, M
       State.getMachineFunction().getSubtarget());
 
   static const MCPhysReg IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 };
+
+  const MipsCCState * MipsState = static_cast<MipsCCState *>(&State);
+
   static const MCPhysReg F32Regs[] = { Mips::F12, Mips::F14 };
 
+  static const MCPhysReg FloatVectorIntRegs[] = { Mips::A0, Mips::A2 };
+
   // Do not process byval args here.
   if (ArgFlags.isByVal())
     return true;
@@ -2602,8 +2654,26 @@ static bool CC_MipsO32(unsigned ValNo, M
                                 State.getFirstUnallocated(F32Regs) != ValNo;
   unsigned OrigAlign = ArgFlags.getOrigAlign();
   bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8);
+  bool isVectorFloat = MipsState->WasOriginalArgVectorFloat(ValNo);
 
-  if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
+  // The MIPS vector ABI for floats passes them in a pair of registers
+  if (ValVT == MVT::i32 && isVectorFloat) {
+    // This is the start of an vector that was scalarized into an unknown number
+    // of components. It doesn't matter how many there are. Allocate one of the
+    // notional 8 byte aligned registers which map onto the argument stack, and
+    // shadow the register lost to alignment requirements.
+    if (ArgFlags.isSplit()) {
+      Reg = State.AllocateReg(FloatVectorIntRegs);
+      if (Reg == Mips::A2)
+        State.AllocateReg(Mips::A1);
+      else if (Reg == 0)
+        State.AllocateReg(Mips::A3);
+    } else {
+      // If we're an intermediate component of the split, we can just attempt to
+      // allocate a register directly.
+      Reg = State.AllocateReg(IntRegs);
+    }
+  } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
     Reg = State.AllocateReg(IntRegs);
     // If this is the first part of an i64 arg,
     // the allocated register must be either A0 or A2.

Modified: llvm/trunk/lib/Target/Mips/MipsISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsISelLowering.h?rev=305083&r1=305082&r2=305083&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Mips/MipsISelLowering.h (original)
+++ llvm/trunk/lib/Target/Mips/MipsISelLowering.h Fri Jun  9 09:37:08 2017
@@ -248,6 +248,33 @@ namespace llvm {
     bool isCheapToSpeculateCttz() const override;
     bool isCheapToSpeculateCtlz() const override;
 
+    /// Return the register type for a given MVT, ensuring vectors are treated
+    /// as a series of gpr sized integers.
+    virtual MVT getRegisterTypeForCallingConv(MVT VT) const override;
+
+    /// Return the register type for a given MVT, ensuring vectors are treated
+    /// as a series of gpr sized integers.
+    virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+                                              EVT VT) const override;
+
+    /// Return the number of registers for a given MVT, ensuring vectors are
+    /// treated as a series of gpr sized integers.
+    virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+                                                   EVT VT) const override;
+
+    /// Break down vectors to the correct number of gpr sized integers.
+    virtual unsigned getVectorTypeBreakdownForCallingConv(
+        LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+        unsigned &NumIntermediates, MVT &RegisterVT) const override;
+
+    /// Return the correct alignment for the current calling convention.
+    virtual unsigned
+    getABIAlignmentForCallingConv(Type *ArgTy, DataLayout DL) const override {
+      if (ArgTy->isVectorTy())
+        return std::min(DL.getABITypeAlignment(ArgTy), 8U);
+      return DL.getABITypeAlignment(ArgTy);
+    }
+
     ISD::NodeType getExtendForAtomicOps() const override {
       return ISD::SIGN_EXTEND;
     }

Modified: llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp?rev=305083&r1=305082&r2=305083&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp Fri Jun  9 09:37:08 2017
@@ -286,7 +286,9 @@ eliminateFrameIndex(MachineBasicBlock::i
 
   DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
                << "spOffset   : " << spOffset << "\n"
-               << "stackSize  : " << stackSize << "\n");
+               << "stackSize  : " << stackSize << "\n"
+               << "alignment  : "
+               << MF.getFrameInfo().getObjectAlignment(FrameIndex) << "\n");
 
   eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset);
 }

Added: llvm/trunk/test/CodeGen/Mips/cconv/vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/cconv/vector.ll?rev=305083&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/cconv/vector.ll (added)
+++ llvm/trunk/test/CodeGen/Mips/cconv/vector.ll Fri Jun  9 09:37:08 2017
@@ -0,0 +1,1657 @@
+; RUN: llc < %s -march=mips -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EB
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB
+; RUN: llc < %s -march=mips -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EB
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5
+; RUN: llc < %s -march=mipsel -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EL
+; RUN: llc < %s -march=mips64el -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL
+; RUN: llc < %s -march=mipsel -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EL
+; RUN: llc < %s -march=mips64el -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5
+
+
+
+; Test that vector types are passed through the integer register set whether or
+; not MSA is enabled. This is a ABI requirement for MIPS. For GCC compatibility
+; we need to handle any power of 2 number of elements. We will test this
+; exhaustively for combinations up to MSA register (128 bits) size.
+
+; First set of tests are for argument passing.
+
+define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) {
+; ALL-LABEL: i8_2:
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 16
+
+; MIPS32EL: addu $1, $4, $5
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 56
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 56
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 48
+
+; MIPS64EL-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64EL-DAG: sll ${{[0-9]+}}, $5, 0
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+  %1 = add <2 x i8> %a, %b
+  ret <2 x i8> %1
+}
+
+; Test that vector spilled to the outgoing argument area have the expected
+; offset from $sp.
+
+define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d,
+                        <2 x i8> %e, <2 x i8> %f, <2 x i8> %g) {
+entry:
+
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $6, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $7, 24
+
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $4, 65280
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $5, 65280
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $6, 65280
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $7, 65280
+
+; MIPS32-DAG: lbu ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG; lbu ${{[0-9]+}}, 17($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 21($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 25($sp)
+
+; MIPS32R5-DAG: sw $4, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $5, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $6, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $7, {{[0-9]+}}($sp)
+
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 40($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 41($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 42($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 43($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 44($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 45($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 46($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 47($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 48($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 49($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 50($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 51($sp)
+
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $6, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $7, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $8, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $9, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $10, 48
+
+; MIPS64R5-DAG: sd $4, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $5, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $6, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $7, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $8, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $9, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $10, {{[0-9]+}}($sp)
+
+  %0 = add <2 x i8> %a, %b
+  %1 = add <2 x i8> %0, %c
+  %2 = add <2 x i8> %1, %d
+  %3 = add <2 x i8> %2, %e
+  %4 = add <2 x i8> %3, %f
+  %5 = add <2 x i8> %4, %g
+  ret <2 x i8> %5
+}
+
+define <4 x i8> @i8_4(<4 x i8> %a, <4 x i8> %b) {
+; ALL-LABEL: i8_4:
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $5, 0
+
+  %1 = add <4 x i8> %a, %b
+  ret <4 x i8> %1
+}
+
+define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) {
+; ALL-LABEL: i8_8:
+; MIPS32-NOT: lw
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 24
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 24
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 16
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 8
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 8
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+  %1 = add <8 x i8> %a, %b
+  ret <8 x i8> %1
+}
+
+define <16 x i8> @i8_16(<16 x i8> %a, <16 x i8> %b) {
+; ALL-LABEL: i8_16:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+  %1 = add <16 x i8> %a, %b
+
+  ret <16 x i8> %1
+}
+
+define <2 x i16> @i16_2(<2 x i16> %a, <2 x i16> %b) {
+; ALL-LABEL: i16_2:
+; MIPS32: addu    $[[R0:[0-9]+]], $4, $5
+; MIPS32: andi    $[[R1:[0-9]+]], $[[R0]], 65535
+; MIPS32: srl     $[[R2:[0-9]+]], $5, 16
+; MIPS32: srl     $[[R3:[0-9]+]], $4, 16
+; MIPS32: addu    $[[R4:[0-9]+]], $[[R3]], $[[R2]]
+; MIPS32: sll     $2, $[[R4]], 16
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $5, 0
+
+  %1 = add <2 x i16> %a, %b
+  ret <2 x i16> %1
+}
+
+define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) {
+; ALL-LABEL: i16_4:
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+  %1 = add <4 x i16> %a, %b
+  ret <4 x i16> %1
+}
+
+define <8 x i16> @i16_8(<8 x i16> %a, <8 x i16> %b) {
+; ALL-LABEL: i16_8:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+  %1 = add <8 x i16> %a, %b
+  ret <8 x i16> %1
+}
+
+define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) {
+; ALL-LABEL: i32_2:
+; MIPS32-DAG: addu    $2, $4, $6
+; MIPS32-DAG: addu    $3, $5, $7
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: sll     ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: sll     ${{[0-9]+}}, $5, 0
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+  %1 = add <2 x i32> %a, %b
+
+  ret <2 x i32> %1
+}
+
+define <4 x i32> @i32_4(<4 x i32> %a, <4 x i32> %b) {
+; ALL-LABEL: i32_4:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: addu $2
+; MIPS32-DAG: addu $3
+; MIPS32-DAG: addu $4
+; MIPS32-DAG: addu $5
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: sll     ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: sll     ${{[0-9]+}}, $5, 0
+; MIPS64-DAG: sll     ${{[0-9]+}}, $6, 0
+; MIPS64-DAG: sll     ${{[0-9]+}}, $7, 0
+; MIPS64-DAG: dsrl    ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: dsrl    ${{[0-9]+}}, $5, 32
+; MIPS64-DAG: dsrl    ${{[0-9]+}}, $6, 32
+; MIPS64-DAG: dsrl    ${{[0-9]+}}, $7, 32
+  %1 = add <4 x i32> %a, %b
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @i64_2(<2 x i64> %a, <2 x i64> %b) {
+; ALL-LABEL: i64_2:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: addu $2
+; MIPS32-DAG: addu $3
+; MIPS32-DAG: addu $4
+; MIPS32-DAG: addu $5
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: daddu $2, $4, $6
+; MIPS64-DAG: daddu $3, $5, $7
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+  %1 = add <2 x i64> %a, %b
+  ret <2 x i64> %1
+}
+
+; The MIPS vector ABI treats vectors of floats differently to vectors of
+; integers.
+
+; For arguments floating pointer vectors are bitcasted to integer vectors whose
+; elements are of GPR width and where the element count is deduced from
+; the length of the floating point vector divided by the size of the GPRs.
+
+; For returns, integer vectors are passed via the GPR register set, but
+; floating point vectors are returned via a hidden sret pointer.
+
+; For testing purposes we skip returning values here and test them below
+; instead.
+ at float_res_v2f32 = external global <2 x float>
+
+define void @float_2(<2 x float> %a, <2 x float> %b) {
+; ALL-LABEL: float_2:
+; MIPS32: mtc1 $7, $f[[F0:[0-9]+]]
+; MIPS32: mtc1 $5, $f[[F1:[0-9]+]]
+; MIPS32: add.s $f[[F2:[0-9]+]], $f[[F1]], $f[[F0]]
+; MIPS32: swc1 $f[[F2]]
+; MIPS32: mtc1 $6, $f[[F3:[0-9]+]]
+; MIPS32: mtc1 $4, $f[[F4:[0-9]+]]
+; MIPS32: add.s $f[[F5:[0-9]+]], $f[[F4]], $f[[F3]]
+; MIPS32: swc1 $f[[F5]]
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
+; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R2:[0-9]+]], $4, 32
+; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R2]], 0
+; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R3]], 0
+; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+  %1 = fadd <2 x float> %a, %b
+  store <2 x float> %1, <2 x float> * @float_res_v2f32
+  ret void
+}
+
+ at float_res_v4f32 = external global <4 x float>
+
+; For MSA this case is suboptimal, the 4 loads can be combined into a single
+; ld.w.
+
+define void @float_4(<4 x float> %a, <4 x float> %b) {
+; ALL-LABEL: float_4:
+; MIPS32-DAG: mtc1 $4
+; MIPS32-DAG: mtc1 $5
+; MIPS32-DAG: mtc1 $6
+; MIPS32-DAG: mtc1 $7
+; MIPS32-DAG: lwc1
+; MIPS32-DAG: lwc1
+; MIPS32-DAG: lwc1
+; MIPS32-DAG: lwc1
+
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $[[R1]]
+; MIPS32R5-DAG: lw $[[R2:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $[[R2]]
+; MIPS32R5-DAG: lw $[[R3:[0-9]+]], 24($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R3]]
+; MIPS32R5-DAG: lw $[[R4:[0-9]+]], 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R4]]
+
+; MIPS32R5-DAG: insert.w $w[[W1:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W1]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W1]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W1]][3], $7
+
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
+; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R2:[0-9]+]], $4, 32
+; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R2]], 0
+; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R3]], 0
+; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
+; MIPS64-DAG: sll $[[R6:[0-9]+]], $6, 0
+; MIPS64-DAG: sll $[[R7:[0-9]+]], $7, 0
+; MIPS64-DAG: mtc1 $[[R6]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R7]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R8:[0-9]+]], $6, 32
+; MIPS64-DAG: dsrl $[[R9:[0-9]+]], $7, 32
+; MIPS64-DAG: sll $[[R10:[0-9]+]], $[[R8]], 0
+; MIPS64-DAG: sll $[[R11:[0-9]+]], $[[R9]], 0
+; MIPS64-DAG: mtc1 $[[R10]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R11]], $f{{[0-9]+}}
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+  %1 = fadd <4 x float> %a, %b
+  store <4 x float> %1, <4 x float> * @float_res_v4f32
+  ret void
+}
+
+ at double_v2f64 = external global <2 x double>
+
+define void @double_2(<2 x double> %a, <2 x double> %b) {
+; ALL-LABEL: double_2:
+; MIPS32-DAG: sw $7
+; MIPS32-DAG: sw $6
+; MIPS32-DAG: ldc1
+; MIPS32-DAG: ldc1
+; MIPS32:     add.d
+; MIPS32-DAG: sw $5
+; MIPS32-DAG: sw $4
+; MIPS32-DAG: ldc1
+; MIPS32-DAG: ldc1
+; MIPS32:     add.d
+
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $[[R1]]
+; MIPS32R5-DAG: lw $[[R2:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $[[R2]]
+; MIPS32R5-DAG: lw $[[R3:[0-9]+]], 24($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R3]]
+; MIPS32R5-DAG: lw $[[R4:[0-9]+]], 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R4]]
+
+; MIPS32R5-DAG: insert.w $w[[W1:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W1]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W1]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W1]][3], $7
+
+; MIPS64-DAG: dmtc1 $6, $f[[R0:[0-9]+]]
+; MIPS64-DAG: dmtc1 $4, $f[[R1:[0-9]+]]
+; MIPS64-DAG: add.d $f[[R2:[0-9]+]], $f[[R1]], $f[[R0]]
+; MIPS64-DAG: dmtc1 $7, $f[[R3:[0-9]+]]
+; MIPS64-DAG: dmtc1 $5, $f[[R4:[0-9]+]]
+; MIPS64-DAG: add.d $f[[R5:[0-9]+]], $f[[R4]], $f[[R3]]
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+  %1 = fadd <2 x double> %a, %b
+  store <2 x double> %1, <2 x double> * @double_v2f64
+  ret void
+}
+
+; Return value testing.
+; Integer vectors are returned in $2, $3, $4, $5 for O32, $2, $3 for N32/N64
+; Floating point vectors are returned through a hidden sret pointer.
+
+ at gv2i8 = global <2 x i8> <i8 1, i8 2>
+ at gv4i8 = global <4 x i8> <i8 0, i8 1, i8 2, i8 3>
+ at gv8i8 = global <8 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
+ at gv16i8 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
+
+ at gv2i16 = global <2 x i16> <i16 1, i16 2>
+ at gv4i16 = global <4 x i16> <i16 0, i16 1, i16 2, i16 3>
+ at gv8i16 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+
+ at gv2i32 = global <2 x i32> <i32 0, i32 1>
+ at gv4i32 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+ at gv2i64 = global <2 x i64> <i64 0, i64 1>
+
+define <2 x i8> @ret_2_i8() {
+; ALL-LABEL: ret_2_i8:
+; MIPS32-DAG:   lhu $2
+; MIPS32R5-DAG: lhu $2
+
+; FIXME: why is this lh instead of lhu on mips64?
+
+; MIPS64-DAG:  lh $2
+; MIPS64-DAG:  lh $2
+  %1 = load <2 x i8>, <2 x i8> * @gv2i8
+  ret <2 x i8> %1
+}
+
+define <4 x i8> @ret_4_i8() {
+; ALL-LABEL: ret_4_i8:
+; MIPS32-DAG:   lw $2
+; MIPS32R5-DAG: lw $2
+
+; MIPS64-DAG:   lw $2
+; MIPS64R5-DAG: lw $2
+
+  %1 = load <4 x i8>, <4 x i8> * @gv4i8
+  ret <4 x i8> %1
+}
+
+define <8 x i8> @ret_8_i8() {
+; ALL-LABEL: ret_8_i8:
+; MIPS32-DAG:   lw $2
+; MIPS32-DAG:   lw $3
+
+; MIPS32R5: copy_s.w $2, $w[[W0:[0-9]+]]
+; MIPS32R5: copy_s.w $3, $w[[W0]]
+
+; MIPS64-DAG:   ld $2
+; MIPS64R5-DAG: ld $2
+  %1 = load <8 x i8>, <8 x i8> * @gv8i8
+  ret <8 x i8> %1
+}
+
+define <16 x i8> @ret_16_i8() {
+; ALL-LABEL: ret_16_i8:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2
+; MIPS64R5-DAG: copy_s.d $3
+
+  %1 = load <16 x i8>, <16 x i8> * @gv16i8
+  ret <16 x i8> %1
+}
+
+define <2 x i16> @ret_2_i16() {
+; ALL-LABEL: ret_2_i16:
+; MIPS32-DAG:   lw $2
+
+; MIPS32R5-DAG: lw $2
+
+; MIPS64-DAG:   lw $2
+
+; MIPS64R5-DAG: lw $2
+  %1 = load <2 x i16>, <2 x i16> * @gv2i16
+  ret <2 x i16> %1
+}
+
+define <4 x i16> @ret_4_i16() {
+; ALL-LABEL: ret_4_i16:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]]
+
+; MIPS64-DAG:   ld $2
+; MIPS64R5-DAG: ld $2
+  %1 = load <4 x i16>, <4 x i16> * @gv4i16
+  ret <4 x i16> %1
+}
+
+define <8 x i16> @ret_8_i16() {
+; ALL-LABEL: ret_8_i16:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2
+; MIPS64R5-DAG: copy_s.d $3
+
+  %1 = load <8 x i16>, <8 x i16> * @gv8i16
+  ret <8 x i16> %1
+}
+
+define <2 x i32> @ret_2_i32() {
+; ALL-LABEL: ret_2_i32:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]]
+
+; MIPS64-DAG:   ld $2
+; MIPS64R5-DAG: ld $2
+
+  %1 = load <2 x i32>, <2 x i32> * @gv2i32
+  ret <2 x i32> %1
+}
+
+define <4 x i32> @ret_4_i32() {
+; ALL-LABEL: ret_4_i32:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2, $w[[W0:[0-9]+]]
+; MIPS64R5-DAG: copy_s.d $3, $w[[W0]]
+
+  %1 = load <4 x i32>, <4 x i32> * @gv4i32
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @ret_2_i64() {
+; ALL-LABEL: ret_2_i64:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2, $w[[W0:[0-9]+]]
+; MIPS64R5-DAG: copy_s.d $3, $w[[W0]]
+
+  %1 = load <2 x i64>, <2 x i64> * @gv2i64
+  ret <2 x i64> %1
+}
+
+ at gv2f32 = global <2 x float> <float 0.0, float 0.0>
+ at gv4f32 = global <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>
+
+define <2 x float> @ret_float_2() {
+entry:
+; ALL-LABEL: ret_float_2:
+
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
+
+; MIPS32R5-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32R5-DAG: swc1 $f{{[0-9]+}}, 4($4)
+
+; MIPS64: ld $2
+
+; MIPS64R5: ld $2
+
+  %0 = load <2 x float>, <2 x float> * @gv2f32
+  ret <2 x float> %0
+}
+
+define <4 x float> @ret_float_4() {
+entry:
+; ALL-LABEL: ret_float_4:
+
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 12($4)
+
+; MIPS32R5: st.w $w{{[0-9]+}}, 0($4)
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2, $w{{[0-9]+}}[0]
+; MIPS64R5-DAG: copy_s.d $3, $w{{[0-9]+}}[1]
+
+  %0 = load <4 x float>, <4 x float> * @gv4f32
+  ret <4 x float> %0
+}
+
+ at gv2f64 = global <2 x double> <double 0.0, double 0.0>
+
+define <2 x double> @ret_double_2() {
+entry:
+; ALL-LABEL: ret_double_2:
+
+; MIPS32-DAG: sdc1 $f{{[0-9]+}}, 8($4)
+; MIPS32-DAG: sdc1 $f{{[0-9]+}}, 0($4)
+
+; MIPS32R5: st.d $w{{[0-9]+}}, 0($4)
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $2
+
+; MIPS64R5-DAG: copy_s.d $2, $w{{[0-9]+}}[0]
+; MIPS64R5-DAG: copy_s.d $3, $w{{[0-9]+}}[1]
+
+  %0 = load <2 x double>, <2 x double> * @gv2f64
+  ret <2 x double> %0
+}
+
+; Test argument lowering and call result lowering.
+
+define void @call_i8_2() {
+entry:
+; ALL-LABEL: call_i8_2:
+; MIPS32EB-DAG: addiu $4
+; MIPS32EB-DAG: addiu $5
+; MIPS32-NOT: addiu $6
+; MIPS32-NOT: addiu $7
+
+; MIPS32R5-DAG: lhu $4, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: lhu $5, {{[0-9]+}}($sp)
+
+; MIPS32R5: jal
+; MIPS32R5: sw $2, {{[0-9]+}}($sp)
+
+; MIPS32R5-DAG: sb ${{[0-9]+}}, 1(${{[0-9]+}})
+; MIPS32R5-DAG; sb ${{[0-9]+}}, %lo(gv2i8)(${{[0-9]+}})
+
+; MIPS64EB: daddiu $4, $zero, 1543
+; MIPS64EB: daddiu $5, $zero, 3080
+
+; MIPS64EL: daddiu $4, $zero, 1798
+; MIPS64EL; daddiu $5, $zero, 2060
+
+; MIPS64R5-DAG: lh $4
+; MIPS64R5-DAG: lh $5
+
+; MIPS32: jal i8_2
+; MIPS64: jalr $25
+
+; MIPS32EB-DAG: srl $[[R0:[0-9]+]], $2, 16
+; MIPS32EB-DAG: sb $[[R0]]
+; MIPS32EB-DAG: srl $[[R1:[0-9]+]], $2, 24
+; MIPS32EB-DAG: sb $[[R1]]
+
+; MIPS32EL: sb $2
+; MIPS32EL: srl $[[R0:[0-9]+]], $2, 8
+; MIPS32EL: sb $[[R0]]
+
+; MIPS64EB: dsrl $[[R4:[0-9]+]], $2, 48
+; MIPS64EB: sb $[[R4]]
+; MIPS64EB: dsrl $[[R5:[0-9]+]], $2, 56
+; MIPS64EB: sb $[[R5]]
+
+; MIPS64EL: sll $[[R6:[0-9]+]], $2, 0
+; MIPS64EL: sb $[[R6]]
+; MIPS64EL: srl $[[R7:[0-9]+]], $[[R6]], 8
+; MIPS64EL: sb $[[R7]]
+
+; MIPS64R5: sd $2
+
+  %0 = call <2 x i8> @i8_2(<2 x i8> <i8 6, i8 7>, <2 x i8> <i8 12, i8 8>)
+  store <2 x i8> %0, <2 x i8> * @gv2i8
+  ret void
+}
+
+define void @call_i8_4() {
+entry:
+; ALL-LABEL: call_i8_4:
+; MIPS32: ori $4
+; MIPS32: ori $5
+; MIPS32-NOT: ori $6
+; MIPS32-NOT: ori $7
+
+; MIPS32R5-DAG: lw $4, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: lw $5, {{[0-9]+}}($sp)
+
+; MIPS64: ori $4
+; MIPS64: ori $5
+
+; MIPS64R5: lw $4
+; MIPS64R5: lw $5
+
+; MIPS32: jal i8_4
+; MIPS64: jalr $25
+
+; MIPS32: sw $2
+
+; MIPS32R5-DAG: sw $2
+
+; MIPS64: sw $2
+; MIPS64R5: sw $2
+
+  %0 = call <4 x i8> @i8_4(<4 x i8> <i8 6, i8 7, i8 9, i8 10>, <4 x i8> <i8 12, i8 8, i8 9, i8 10>)
+  store <4 x i8> %0, <4 x i8> * @gv4i8
+  ret void
+}
+
+define void @call_i8_8() {
+entry:
+; ALL-LABEL: call_i8_8:
+
+; MIPS32: ori $6
+; MIPS32: ori $4
+; MIPS32: move  $5
+; MIPS32: move  $7
+
+; MIPS32R5-DAG: ori $6
+; MIPS32R5-DAG: ori $4
+; MIPS32R5-DAG: move  $5
+; MIPS32R5-DAG: move  $7
+
+; MIPS64EB: daddiu $4, ${{[0-9]+}}, 2314
+; MIPS64EB: daddiu $5, ${{[0-9]+}}, 2314
+
+; MIPS64EL: daddiu $4, ${{[0-9]+}}, 1798
+; MIPS64EL: daddiu $5, ${{[0-9]+}}, 2060
+
+; MIPS32: jal i8_8
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $2
+; MIPS32-DAG: sw $3
+
+; MIPS32R5-DAG: sw $2
+; MIPS32R5-DAG: sw $3
+
+; MIPS64: sd $2
+; MIPS64R5: sd $2
+
+  %0 = call <8 x i8> @i8_8(<8 x i8> <i8 6, i8 7, i8 9, i8 10, i8 6, i8 7, i8 9, i8 10>, <8 x i8> <i8 12, i8 8, i8 9, i8 10, i8 6, i8 7, i8 9, i8 10>)
+  store <8 x i8> %0, <8 x i8> * @gv8i8
+  ret void
+}
+
+define void @calli8_16() {
+entry:
+; ALL-LABEL: calli8_16:
+; MIPS32-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS32: ori $4, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32: ori $7, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32: move  $5, ${{[0-9]+}}
+; MIPS32: move  $6, ${{[0-9]+}}
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $5
+; MIPS64-DAG: daddiu $6
+; MIPS64-DAG: daddiu $7
+
+; MIPS64R5-DAG: copy_s.d $4
+; MIPS64R5-DAG: copy_s.d $5
+; MIPS64R5-DAG: copy_s.d $6
+; MIPS64R5-DAG: copy_s.d $7
+
+; MIPS32: jal i8_16
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv16i8)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64-DAG: sd $3
+; MIPS64-DAG: sd $2
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+  %0 = call <16 x i8> @i8_16(<16 x i8> <i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7, i8 6, i8 7, i8 9, i8 10>, <16 x i8> <i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 12, i8 8, i8 9, i8 10>)
+  store <16 x i8> %0, <16 x i8> * @gv16i8
+  ret void
+}
+
+define void @calli16_2() {
+entry:
+; ALL-LABEL: calli16_2:
+
+; MIPS32-DAG: ori $4
+; MIPS32-DAG: ori $5
+
+; MIPS32R5-DAG: lw $4
+; MIPS32R5-DAG: lw $5
+
+; MIPS64: ori $4
+; MIPS64: ori $5
+
+; MIPS64R5-DAG: lw $4
+; MIPS64R5-DAG: lw $5
+
+; MIPS32: jal i16_2
+; MIPS64: jalr $25
+
+; MIPS32: sw $2, %lo(gv2i16)
+
+; MIPS32R5: sw $2, %lo(gv2i16)
+
+; MIPS64: sw $2
+
+; MIPS64R6: sw $2
+
+  %0 = call <2 x i16> @i16_2(<2 x i16> <i16 6, i16 7>, <2 x i16> <i16 12, i16 8>)
+  store <2 x i16> %0, <2 x i16> * @gv2i16
+  ret void
+}
+
+define void @calli16_4() {
+entry:
+; ALL-LABEL: calli16_4:
+; MIPS32-DAG: ori $4
+; MIPS32-DAG: ori $5
+; MIPS32-DAG: ori $6
+; MIPS32-DAG: move $7
+
+; MIPS32R5-DAG: ori $4
+; MIPS32R5-DAG: ori $5
+; MIPS32R5-DAG: ori $6
+; MIPS32R5-DAG: move $7
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $5
+
+; MIPS64R5-DAG: ld $4
+; MIPS64R5-DAG: ld $5
+
+; MIPS32: jal i16_4
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv4i16)(${{[0-9]+}})
+
+; MIPS32R5-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32R5-DAG: sw $2, %lo(gv4i16)(${{[0-9]+}})
+
+; MIPS64: sd $2
+; MIPS64R5: sd $2
+
+  %0 = call <4 x i16> @i16_4(<4 x i16> <i16 6, i16 7, i16 9, i16 10>, <4 x i16> <i16 12, i16 8, i16 9, i16 10>)
+  store <4 x i16> %0, <4 x i16> * @gv4i16
+  ret void
+}
+
+define void @calli16_8() {
+entry:
+; ALL-LABEL: calli16_8:
+
+; MIPS32-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS32-DAG: ori $4, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32-DAG: ori $5, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32-DAG: move  $6, ${{[0-9]+}}
+; MIPS32-DAG: move  $7, ${{[0-9]+}}
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $7
+; MIPS64-DAG: move $5
+; MIPS64-DAG: move $6
+
+; MIPS64R5-DAG: copy_s.d $4, $w[[W0:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $5, $w[[W0]][1]
+; MIPS64R5-DAG: copy_s.d $6, $w[[W1:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $7, $w[[W1]][1]
+
+; MIPS32: jal i16_8
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv8i16)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64: sd $3
+; MIPS64: sd $2
+
+; MIPS64R5-DAG: insert.d $w[[W2:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W2]][1], $3
+
+  %0 = call <8 x i16> @i16_8(<8 x i16> <i16 6, i16 7, i16 9, i16 10, i16 6, i16 7, i16 9, i16 10>, <8 x i16> <i16 6, i16 7, i16 9, i16 10, i16 12, i16 8, i16 9, i16 10>)
+  store <8 x i16> %0, <8 x i16> * @gv8i16
+  ret void
+}
+
+define void @calli32_2() {
+entry:
+; ALL-LABEL: calli32_2:
+
+; MIPS32-DAG: addiu $4
+; MIPS32-DAG: addiu $5
+; MIPS32-DAG: addiu $6
+; MIPS32-DAG: addiu $7
+
+; MIPS32R5-DAG: addiu $4
+; MIPS32R5-DAG: addiu $5
+; MIPS32R5-DAG: addiu $6
+; MIPS32R5-DAG: addiu $7
+
+; MIPS64: daddiu $4
+; MIPS64: daddiu $5
+
+; MIPS64R5-DAG: ld $4
+; MIPS64R5-DAG: ld $5
+
+; MIPS32: jal i32_2
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $2, %lo(gv2i32)(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+
+; MIPS32R5-DAG: sw $2, %lo(gv2i32)(${{[0-9]+}})
+; MIPS32R5-DAG: sw $3, 4(${{[0-9]+}})
+
+; MIPS64: sd $2
+
+; MIPS64R5: sd $2
+
+  %0 = call <2 x i32> @i32_2(<2 x i32> <i32 6, i32 7>, <2 x i32> <i32 12, i32 8>)
+  store <2 x i32> %0, <2 x i32> * @gv2i32
+  ret void
+}
+
+define void @calli32_4() {
+entry:
+; ALL-LABEL: calli32_4:
+
+; MIPS32-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS32-DAG: addiu $4
+; MIPS32-DAG: addiu $5
+; MIPS32-DAG: addiu $6
+; MIPS32-DAG: addiu $7
+
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS32R5-DAG: addiu $4
+; MIPS32R5-DAG: addiu $5
+; MIPS32R5-DAG: addiu $6
+; MIPS32R5-DAG: addiu $7
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $6
+; MIPS64-DAG: daddiu $5
+; MIPS64-DAG: move $7
+
+; MIPS64R5-DAG: copy_s.d $4, $w[[W0:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $5, $w[[W0]][1]
+; MIPS64R5-DAG: copy_s.d $6, $w[[W1:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $7, $w[[W1]][1]
+
+; MIPS32: jal i32_4
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv4i32)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64-DAG: sd $2
+; MIPS64-DAG: sd $3
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R6-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+  %0 = call <4 x i32> @i32_4(<4 x i32> <i32 6, i32 7, i32 9, i32 10>, <4 x i32> <i32 12, i32 8, i32 9, i32 10>)
+  store <4 x i32> %0, <4 x i32> * @gv4i32
+  ret void
+}
+
+define void @calli64_2() {
+entry:
+; ALL-LABEL: calli64_2:
+
+; MIPS32-DAG: sw  ${{[0-9a-z]+}}, 28($sp)
+; MIPS32-DAG: sw  ${{[0-9a-z]+}}, 24($sp)
+; MIPS32-DAG: sw  ${{[0-9a-z]+}}, 20($sp)
+; MIPS32-DAG: sw  ${{[0-9a-z]+}}, 16($sp)
+
+; MIPS32-DAG: addiu $4
+; MIPS32-DAG: addiu $5
+; MIPS32-DAG: addiu $6
+; MIPS32-DAG: addiu $7
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS64: daddiu $4
+; MIPS64: daddiu $5
+; MIPS64: daddiu $6
+; MIPS64: daddiu $7
+
+; MIPS64R5: daddiu $4
+; MIPS64R5: daddiu $5
+; MIPS64R5: daddiu $6
+; MIPS64R5: daddiu $7
+
+; MIPS32: jal i64_2
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv2i64)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64-DAG: sd $3
+; MIPS64-DAG: sd $2
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R6-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+  %0 = call <2 x i64> @i64_2(<2 x i64> <i64 6, i64 7>, <2 x i64> <i64 12, i64 8>)
+  store <2 x i64> %0, <2 x i64> * @gv2i64
+  ret void
+}
+
+declare <2 x float> @float2_extern(<2 x float>, <2 x float>)
+declare <4 x float> @float4_extern(<4 x float>, <4 x float>)
+declare <2 x double> @double2_extern(<2 x double>, <2 x double>)
+
+define void @callfloat_2() {
+entry:
+; ALL-LABEL: callfloat_2:
+
+; MIPS32-DAG: addiu $4, $sp, 24
+; MIPS32-DAG: addiu $6, $zero, 0
+; MIPS32-DAG: lui $7
+
+; MIPS32R5-DAG: addiu $4, $sp, 24
+; MIPS32R5-DAG: addiu $6, $zero, 0
+; MIPS32R5-DAG: lui $7
+
+; MIPS64: dsll $4
+; MIPS64: dsll $5
+
+; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
+
+; MIPS32: jal float2_extern
+; MIPS64: jalr $25
+
+; MIPS32-DAG: lwc1 $f[[F0:[0-9]+]], 24($sp)
+; MIPS32-DAG: lwc1 $f[[F1:[0-9]+]], 28($sp)
+
+; MIPS32-DAG: swc1 $f[[F1]], 4(${{[0-9]+}})
+; MIPS32-DAG: swc1 $f[[F0]], %lo(gv2f32)(${{[0-9]+}})
+
+; MIPS32R5-DAG: lwc1 $f[[F0:[0-9]+]], 24($sp)
+; MIPS32R5-DAG: lwc1 $f[[F1:[0-9]+]], 28($sp)
+
+; MIPS32R5-DAG: swc1 $f[[F1]], 4(${{[0-9]+}})
+; MIPS32R5-DAG: swc1 $f[[F0]], %lo(gv2f32)(${{[0-9]+}})
+
+; MIPS64: sd $2
+
+; MIPS64R5: sd $2
+
+  %0 = call <2 x float> @float2_extern(<2 x float> <float 0.0, float -1.0>, <2 x float> <float 12.0, float 14.0>)
+  store <2 x float> %0, <2 x float> * @gv2f32
+  ret void
+}
+
+define void @callfloat_4() {
+entry:
+; ALL-LABEL: callfloat_4:
+
+; MIPS32: sw ${{[0-9]+}}, 36($sp)
+; MIPS32: sw ${{[0-9]+}}, 32($sp)
+; MIPS32: sw ${{[0-9]+}}, 28($sp)
+; MIPS32: sw ${{[0-9]+}}, 24($sp)
+; MIPS32: sw ${{[0-9]+}}, 20($sp)
+; MIPS32: sw ${{[0-9]+}}, 16($sp)
+; MIPS32: addiu $4, $sp, 48
+; MIPS32: addiu $6, $zero, 0
+; MIPS32: lui $7
+
+; MIPS32R5: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5: copy_s.w $7, $w{{[0-9]+}}
+; MIPS32R5: sw ${{[0-9]+}}, 36($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 32($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 28($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 24($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 20($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 16($sp)
+; MIPS32R5: addiu $4, $sp, 48
+
+; MIPS64-DAG: dsll $4
+; MIPS64-DAG: dsll $5
+; MIPS64-DAG: dsll $6
+; MIPS64-DAG: dsll $7
+
+; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $6, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $7, $w{{[0-9]+}}
+
+; MIPS64: jalr $25
+; MIPS32: jal
+
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 48($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 52($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 56($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 60($sp)
+
+; MIPS32R5: ld.w $w{{[0-9]+}}, 48($sp)
+
+; MIPS64-DAG: $2
+; MIPS64-DAG: $3
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+  %0 = call <4 x float> @float4_extern(<4 x float> <float 0.0, float -1.0, float 2.0, float 4.0>, <4 x float> <float 12.0, float 14.0, float 15.0, float 16.0>)
+  store <4 x float> %0, <4 x float> * @gv4f32
+  ret void
+}
+
+define void @calldouble_2() {
+entry:
+; ALL-LABEL: calldouble_2:
+
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 36($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 32($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 28($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 24($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 20($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 16($sp)
+
+; MIPS32-DAG: addiu $4, $sp, [[R0:[0-9]+]]
+; MIPS32-DAG: addiu $6, $zero, 0
+; MIPS32-DAG: addiu $7, $zero, 0
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 36($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 32($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS64-DAG: dsll $5
+; MIPS64-DAG: dsll $6
+; MIPS64-DAG: dsll $7
+; MIPS64-DAG: daddiu $4
+
+; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $6, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $7, $w{{[0-9]+}}
+
+; MIPS32: jal double2_extern
+; MIPS64: jalr $25
+
+; MIPS32-DAG: ldc1 $f[[F0:[0-9]+]], 48($sp)
+; MIPS32-DAG: ldc1 $f[[F1:[0-9]+]], 56($sp)
+
+; MIPS32-DAG: sdc1 $f[[F1]], 8(${{[0-9]+}})
+; MIPS32-DAG: sdc1 $f[[F0]], %lo(gv2f64)(${{[0-9]+}})
+
+; MIPS32R5: ld.d $w[[W0:[0-9]+]], 48($sp)
+; MIPS32R5: st.d $w[[W0]], 0(${{[0-9]+}})
+
+; MIPS64-DAG: sd $2
+; MIPS64-DAG: sd $3
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+  %0 = call <2 x double> @double2_extern(<2 x double> <double 0.0, double -1.0>, <2 x double> <double 12.0, double 14.0>)
+  store <2 x double> %0, <2 x double> * @gv2f64
+  ret void
+}
+
+; The mixed tests show that due to alignment requirements, $5 is not used
+; in argument passing.
+
+define float @mixed_i8(<2 x float> %a, i8 %b, <2 x float> %c) {
+entry:
+; ALL-LABEL: mixed_i8:
+
+; MIPS32-DAG: mtc1 $5, $f{{[0-9]+}}
+; MIPS32: andi $[[R7:[0-9]+]], $6, 255
+; MIPS32: mtc1 $[[R7]], $f[[F0:[0-9]+]]
+; MIPS32: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
+
+; MIPS32-DAG: mtc1 $4, $f{{[0-9]+}}
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 20($sp)
+; MIPS32-DAG: add.s $f0, $f{{[0-9]+}}, $f{{[0-9]+}}
+
+; MIPS32R5: andi $[[R0:[0-9]+]], $6, 255
+; MIPS32R5: sw $[[R0]], {{[0-9]+}}($sp)
+; MIPS32R5: sw $[[R0]], {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $5, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $4, {{[0-9]+}}($sp)
+
+; MIPS64EB-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64EB-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64EB: sll $[[R6:[0-9]+]], $5, 0
+; MIPS64EB: andi $[[R7:[0-9]+]], $[[R6]], 255
+; MIPS64EB: mtc1 $[[R7]], $f[[F0:[0-9]+]]
+; MIPS64EB: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
+
+; MIPS64EB-DAG: dsrl $[[R1:[0-9]+]], $4, 32
+; MIPS64EB-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
+; MIPS64EB-DAG: mtc1 $[[R2:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EB-DAG: sll $[[R3:[0-9]+]], $6, 0
+; MIPS64EB-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
+; MIPS64EB-DAG: dsrl $[[R4:[0-9]+]], $6, 32
+; MIPS64EB-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
+; MIPS64EB-DAG: mtc1 $[[R5:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EL-DAG: dsrl $[[R1:[0-9]+]], $4, 32
+; MIPS64EL-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
+; MIPS64EL-DAG: mtc1 $[[R2:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EL: sll $[[R6:[0-9]+]], $5, 0
+; MIPS64EL: andi $[[R7:[0-9]+]], $[[R6]], 255
+; MIPS64EL: mtc1 $[[R7]], $f[[F0:[0-9]+]]
+; MIPS64EL: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
+
+; MIPS64EL-DAG: dsrl $[[R4:[0-9]+]], $6, 32
+; MIPS64EL-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
+; MIPS64EL-DAG: mtc1 $[[R5:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EL-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64EL-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64EL-DAG: sll $[[R3:[0-9]+]], $6, 0
+; MIPS64EL-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
+
+; MIPS64R5: sll $[[R0:[0-9]+]], $5, 0
+; MIPS64R5: andi $[[R1:[0-9]+]], $[[R0]], 255
+; MIPS64R5: sd $4, {{[0-9]+}}($sp)
+; MIPS64R5: sd $6, {{[0-9]+}}($sp)
+
+  %0 = zext i8 %b to i32
+  %1 = uitofp i32 %0 to float
+  %2 = insertelement <2 x float> undef, float %1, i32 0
+  %3 = insertelement <2 x float> %2, float %1, i32 1
+  %4 = fadd <2 x float> %3, %a
+  %5 = fadd <2 x float> %4, %c
+  %6 = extractelement <2 x float> %5, i32 0
+  %7 = extractelement <2 x float> %5, i32 1
+  %8 = fadd float %6, %7
+  ret float %8
+}
+
+define <4 x float> @mixed_32(<4 x float> %a, i32 %b) {
+entry:
+; ALL-LABEL: mixed_32:
+
+; MIPS32-DAG: mtc1 $6, $f{{[0-9]+}}
+; MIPS32-DAG: mtc1 $7, $f{{[0-9]+}}
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 28($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 24($sp)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 12($4)
+
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][0], $6
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][1], $7
+; MIPS32R5: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][2], $[[R0]]
+; MIPS32R5: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][3], $[[R1]]
+; MIPS32R5: lw $[[R0:[0-9]+]], 24($sp)
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $6, 0
+; MIPS64-DAG: dsrl $[[R0:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $[[R0]], 0
+; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
+; MIPS64-DAG: sll $[[R2:[0-9]+]], $4, 0
+; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R3]], 0
+; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R2]], $f{{[0-9]+}}
+; MIPS64-DAG: sll	$[[R6:[0-9]+]], $5, 0
+; MIPS64-DAG: mtc1 $[[R6:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64R5: insert.d $w[[W0:[0-9]+]][0], $4
+; MIPS64R5: insert.d $w[[W0]][1], $5
+; MIPS64R5: sll $[[R0:[0-9]+]], $6, 0
+; MIPS64R5: fill.w $w{{[0-9]+}}, $[[R0]]
+
+  %0 = uitofp i32 %b to float
+  %1 = insertelement <4 x float> undef, float %0, i32 0
+  %2 = insertelement <4 x float> %1, float %0, i32 1
+  %3 = insertelement <4 x float> %2, float %0, i32 2
+  %4 = insertelement <4 x float> %3, float %0, i32 3
+  %5 = fadd <4 x float> %4, %a
+  ret <4 x float> %5
+}
+
+
+; This test is slightly more fragile than I'd like as the offset into the
+; outgoing arguments area is dependant on the size of the stack frame for
+; this function.
+
+define <4 x float> @cast(<4 x i32> %a) {
+entry:
+; ALL-LABEL: cast:
+
+; MIPS32: addiu $sp, $sp, -32
+; MIPS32-DAG: sw $6, {{[0-9]+}}($sp)
+; MIPS32-DAG: sw $7, {{[0-9]+}}($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 48($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 52($sp)
+
+; MIPS32R5-DAG: insert.w  $w0[0], $6
+; MIPS32R5-DAG: insert.w  $w0[1], $7
+; MIPS32R5-DAG: lw  $[[R0:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: insert.w  $w0[2], $[[R0]]
+; MIPS32R5-DAG: lw  $[[R1:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w  $w0[3], $[[R1]]
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+
+; MIPS64R5-DAG: insert.d  $w0[0], $4
+; MIPS64R5-DAG: insert.d  $w0[1], $5
+
+  %0 = uitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %0
+}
+
+define <4 x float> @select(<4 x i32> %cond, <4 x float> %arg1, <4 x float> %arg2) {
+entry:
+; ALL-LABEL: select:
+
+; MIPS32-DAG: andi ${{[0-9]+}}, $7, 1
+; MIPS32-DAG: andi ${{[0-9]+}}, $6, 1
+; MIPS32-DAG: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32-DAG: andi ${{[0-9]+}}, $[[R0]], 1
+; MIPS32-DAG: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32-DAG: andi ${{[0-9]+}}, $[[R0]], 1
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $7
+; MIPS32R5-DAG: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R0]]
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R1]]
+; MIPS32R5-DAG: slli.w $w{{[0-9]}}, $w[[W0]]
+
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $6, 0
+; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R1:[0-9]+]], $6, 32
+; MIPS64-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
+; MIPS64-DAG: mtc1 $[[R2]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R3:[0-9]+]], $7, 0
+; MIPS64-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R4:[0-9]+]], $7, 32
+; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
+; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R6:[0-9]+]], $8, 0
+; MIPS64-DAG: mtc1 $[[R6]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R7:[0-9]+]], $8, 32
+; MIPS64-DAG: sll $[[R8:[0-9]+]], $[[R7]], 0
+; MIPS64-DAG: mtc1 $[[R8]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R9:[0-9]+]], $9, 0
+; MIPS64-DAG: mtc1 $[[R9]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R10:[0-9]+]], $9, 32
+; MIPS64-DAG: sll $[[R11:[0-9]+]], $[[R10]], 0
+; MIPS64-DAG: mtc1 $[[R11]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R12:[0-9]+]], $4, 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R12]], 1
+; MIPS64-DAG: dsrl $[[R13:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[R14:[0-9]+]], $[[R13]], 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R14]], 1
+
+; MIPS64-DAG: sll $[[R15:[0-9]+]], $5, 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R15]], 1
+; MIPS64-DAG: dsrl $[[R16:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R17:[0-9]+]], $[[R16]], 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R17]], 1
+
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $8
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $9
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $6
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $7
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $4
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $5
+
+  %cond.t = trunc <4 x i32> %cond to <4 x i1>
+  %res = select <4 x i1> %cond.t, <4 x float> %arg1, <4 x float> %arg2
+  ret <4 x float> %res
+}

Modified: llvm/trunk/test/CodeGen/Mips/ctlz-v.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/ctlz-v.ll?rev=305083&r1=305082&r2=305083&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/ctlz-v.ll (original)
+++ llvm/trunk/test/CodeGen/Mips/ctlz-v.ll Fri Jun  9 09:37:08 2017
@@ -8,10 +8,14 @@ entry:
 ; MIPS32: clz     $2, $4
 ; MIPS32: clz     $3, $5
 
-; MIPS64-DAG: sll $[[A0:[0-9]+]], $4, 0
-; MIPS64-DAG: clz $2, $[[A0]]
-; MIPS64-DAG: sll $[[A1:[0-9]+]], $5, 0
-; MIPS64-DAG: clz $3, $[[A1]]
+; MIPS64-DAG: dsrl $[[A0:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[A1:[0-9]+]], $[[A0]], 0
+; MIPS64-DAG: clz $[[R0:[0-9]+]], $[[A1]]
+; MIPS64-DAG: dsll $[[R1:[0-9]+]], $[[R0]], 32
+; MIPS64-DAG: sll $[[A2:[0-9]+]], $4, 0
+; MIPS64-DAG: clz $[[R2:[0-9]+]], $[[A2]]
+; MIPS64-DAG: dext $[[R3:[0-9]+]], $[[R2]], 0, 32
+; MIPS64-DAG: or $2, $[[R3]], $[[R1]]
 
   %ret = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 true)
   ret <2 x i32> %ret

Modified: llvm/trunk/test/CodeGen/Mips/cttz-v.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/cttz-v.ll?rev=305083&r1=305082&r2=305083&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/cttz-v.ll (original)
+++ llvm/trunk/test/CodeGen/Mips/cttz-v.ll Fri Jun  9 09:37:08 2017
@@ -24,14 +24,17 @@ entry:
 ; MIPS64-DAG: and     $[[R2:[0-9]+]], $[[R1]], $[[R0]]
 ; MIPS64-DAG: clz     $[[R3:[0-9]+]], $[[R2]]
 ; MIPS64-DAG: addiu   $[[R4:[0-9]+]], $zero, 32
-; MIPS64-DAG: subu    $2, $[[R4]], $[[R3]]
-; MIPS64-DAG: sll     $[[A1:[0-9]+]], $5, 0
-; MIPS64-DAG: addiu   $[[R5:[0-9]+]], $[[A1]], -1
-; MIPS64-DAG: not     $[[R6:[0-9]+]], $[[A1]]
-; MIPS64-DAG: and     $[[R7:[0-9]+]], $[[R6]], $[[R5]]
-; MIPS64-DAG: clz     $[[R8:[0-9]+]], $[[R7]]
-; MIPS64-DAG: jr      $ra
-; MIPS64-DAG: subu    $3, $[[R4]], $[[R8]]
+; MIPS64-DAG: subu    $[[R5:[0-9]+]], $[[R4]], $[[R3]]
+; MIPS64-DAG: dsrl    $[[R6:[0-9]+]], $4, 32
+; MIPS64-DAG: sll     $[[R7:[0-9]+]], $[[R6]], 0
+; MIPS64-DAG: dext    $[[R8:[0-9]+]], $[[R5]], 0, 32
+; MIPS64-DAG: addiu   $[[R9:[0-9]+]], $[[R7]], -1
+; MIPS64-DAG: not     $[[R10:[0-9]+]], $[[R7]]
+; MIPS64-DAG: and     $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; MIPS64-DAG: clz     $[[R12:[0-9]+]], $[[R11]]
+; MIPS64-DAG: subu    $[[R13:[0-9]+]], $[[R4]], $[[R12]]
+; MIPS64-DAG: dsll    $[[R14:[0-9]+]], $[[R13]], 32
+; MIPS64-DAG: or      $2, $[[R8]], $[[R14]]
 
   %ret = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 true)
   ret <2 x i32> %ret

Modified: llvm/trunk/test/CodeGen/Mips/return-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/return-vector.ll?rev=305083&r1=305082&r2=305083&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/return-vector.ll (original)
+++ llvm/trunk/test/CodeGen/Mips/return-vector.ll Fri Jun  9 09:37:08 2017
@@ -128,8 +128,11 @@ entry:
 
 ; CHECK-LABEL:        call_f2:
 ; CHECK:        call16(f2)
-; CHECK-NOT:    lwc1
-; CHECK:        add.s    $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]]
+; CHECK:        addiu $4, $sp, [[O0:[0-9]+]]
+; CHECK-DAG:    lwc1 $f[[F0:[0-9]]], [[O0]]($sp)
+; CHECK-DAG:    lwc1 $f[[F1:[0-9]]], 20($sp)
+; CHECK:        add.s    $f0, $f[[F0]], $f[[F1]]
+
 }
 
 
@@ -143,11 +146,12 @@ entry:
 
 ; CHECK-LABEL:        call_d2:
 ; CHECK:        call16(d2)
-; CHECK-NOT:    ldc1
-; CHECK:        add.d    $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]]
-}
-
+; CHECK:        addiu $4, $sp, [[O0:[0-9]+]]
+; CHECK-DAG:    ldc1 $f[[F0:[0-9]+]], 24($sp)
+; CHECK-DAG:    ldc1 $f[[F1:[0-9]+]], [[O0]]($sp)
+; CHECK:        add.d    $f0, $f[[F1]], $f[[F0]]
 
+}
 
 ; Check that function returns vector on stack in cases when vector can't be
 ; returned in registers. Also check that vector is placed on stack starting
@@ -179,11 +183,12 @@ entry:
   ret <4 x float> %vecins4
 
 ; CHECK-LABEL:        return_f4:
-; CHECK-DAG:    lwc1    $[[R0:[a-z0-9]+]], 16($sp)
-; CHECK-DAG:    swc1    $[[R0]], 12($4)
+; CHECK-DAG:    lwc1    $f[[R0:[0-9]+]], 16($sp)
+; CHECK-DAG:    swc1    $f[[R0]], 12($4)
 ; CHECK-DAG:    sw      $7, 8($4)
 ; CHECK-DAG:    sw      $6, 4($4)
 ; CHECK-DAG:    sw      $5, 0($4)
+
 }
 
 
@@ -227,8 +232,8 @@ entry:
   ret <2 x float> %vecins2
 
 ; CHECK-LABEL:        return_f2:
-; CHECK:        mov.s   $f0, $f12
-; CHECK:        mov.s   $f2, $f14
+; CHECK-DAG:    sw   $5, 0($4)
+; CHECK-DAG:    sw   $6, 4($4)
 }
 
 
@@ -239,6 +244,10 @@ entry:
   ret <2 x double> %vecins2
 
 ; CHECK-LABEL:        return_d2:
-; CHECK:        mov.d   $f0, $f12
-; CHECK:        mov.d   $f2, $f14
+; CHECK-DAG:    ldc1 $f[[F0:[0-9]]], 16($sp)
+; CHECK-DAG:    sdc1 $f[[F0]], 8($4)
+; CHECK-DAG:    mtc1 $6, $f[[F1:[0-9]+]]
+; CHECK-DAG:    mtc1 $7, $f
+; CHECK-DAG:    sdc1 $f[[F0]], 0($4)
+
 }