[llvm] r283190 - [Power9] Part-word VSX integer scalar loads/stores and sign extend instructions

Mon Oct 3 23:59:24 PDT 2016

Author: nemanjai
Date: Tue Oct  4 01:59:23 2016
New Revision: 283190

URL: http://llvm.org/viewvc/llvm-project?rev=283190&view=rev
Log:
[Power9] Part-word VSX integer scalar loads/stores and sign extend instructions

This patch corresponds to review:
https://reviews.llvm.org/D23155

This patch removes the VSHRC register class (based on D20310) and adds
exploitation of the Power9 sub-word integer loads into VSX registers as well
as vector sign extensions.
The new instructions are useful for a few purposes:

    Int to Fp conversions of 1 or 2-byte values loaded from memory
    Building vectors of 1 or 2-byte integers with values loaded from memory
    Storing individual 1 or 2-byte elements from integer vectors

This patch implements all of those uses.

Added:
    llvm/trunk/test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll
    llvm/trunk/test/CodeGen/PowerPC/vsx-vec-spill.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
    llvm/trunk/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
    llvm/trunk/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
    llvm/trunk/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
    llvm/trunk/lib/Target/PowerPC/PPCAsmPrinter.cpp
    llvm/trunk/lib/Target/PowerPC/PPCCallingConv.td
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
    llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td
    llvm/trunk/lib/Target/PowerPC/PPCInstrFormats.td
    llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp
    llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h
    llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
    llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp
    llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp
    llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.td
    llvm/trunk/lib/Target/PowerPC/PPCVSXCopy.cpp
    llvm/trunk/test/CodeGen/PowerPC/inline-asm-scalar-to-vector-error.ll
    llvm/trunk/test/CodeGen/PowerPC/load-v4i8-improved.ll
    llvm/trunk/test/CodeGen/PowerPC/machine-combiner.ll
    llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
    llvm/trunk/test/CodeGen/PowerPC/power9-moves-and-splats.ll
    llvm/trunk/test/CodeGen/PowerPC/ppc64-i128-abi.ll
    llvm/trunk/test/CodeGen/PowerPC/select-i1-vs-i1.ll
    llvm/trunk/test/CodeGen/PowerPC/sjlj.ll
    llvm/trunk/test/CodeGen/PowerPC/vsx-args.ll
    llvm/trunk/test/CodeGen/PowerPC/vsx-infl-copy1.ll
    llvm/trunk/test/CodeGen/PowerPC/vsx-p8.ll
    llvm/trunk/test/CodeGen/PowerPC/vsx-spill-norwstore.ll
    llvm/trunk/test/CodeGen/PowerPC/vsx.ll

Modified: llvm/trunk/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================

--- llvm/trunk/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp Tue Oct  4 01:59:23 2016
@@ -83,6 +83,16 @@ static const MCPhysReg FRegs[32] = {
   PPC::F24, PPC::F25, PPC::F26, PPC::F27,
   PPC::F28, PPC::F29, PPC::F30, PPC::F31
 };
+static const MCPhysReg VFRegs[32] = {
+  PPC::VF0,  PPC::VF1,  PPC::VF2,  PPC::VF3,
+  PPC::VF4,  PPC::VF5,  PPC::VF6,  PPC::VF7,
+  PPC::VF8,  PPC::VF9,  PPC::VF10, PPC::VF11,
+  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
 static const MCPhysReg VRegs[32] = {
   PPC::V0,  PPC::V1,  PPC::V2,  PPC::V3,
   PPC::V4,  PPC::V5,  PPC::V6,  PPC::V7,
@@ -103,14 +113,14 @@ static const MCPhysReg VSRegs[64] = {
   PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
   PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
 
-  PPC::VSH0,  PPC::VSH1,  PPC::VSH2,  PPC::VSH3,
-  PPC::VSH4,  PPC::VSH5,  PPC::VSH6,  PPC::VSH7,
-  PPC::VSH8,  PPC::VSH9,  PPC::VSH10, PPC::VSH11,
-  PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
-  PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
-  PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
-  PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
-  PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
+  PPC::V0,  PPC::V1,  PPC::V2,  PPC::V3,
+  PPC::V4,  PPC::V5,  PPC::V6,  PPC::V7,
+  PPC::V8,  PPC::V9,  PPC::V10, PPC::V11,
+  PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+  PPC::V16, PPC::V17, PPC::V18, PPC::V19,
+  PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+  PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+  PPC::V28, PPC::V29, PPC::V30, PPC::V31
 };
 static const MCPhysReg VSFRegs[64] = {
   PPC::F0,  PPC::F1,  PPC::F2,  PPC::F3,
@@ -597,6 +607,11 @@ public:
     Inst.addOperand(MCOperand::createReg(FRegs[getReg()]));
   }
 
+  void addRegVFRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(VFRegs[getReg()]));
+  }
+
   void addRegVRRCOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createReg(VRegs[getReg()]));

Modified: llvm/trunk/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp Tue Oct  4 01:59:23 2016
@@ -89,6 +89,17 @@ static const unsigned FRegs[] = {
   PPC::F28, PPC::F29, PPC::F30, PPC::F31
 };
 
+static const unsigned VFRegs[] = {
+  PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+  PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+  PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+
 static const unsigned VRegs[] = {
   PPC::V0, PPC::V1, PPC::V2, PPC::V3,
   PPC::V4, PPC::V5, PPC::V6, PPC::V7,
@@ -110,14 +121,14 @@ static const unsigned VSRegs[] = {
   PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
   PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
 
-  PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
-  PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
-  PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
-  PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
-  PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
-  PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
-  PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
-  PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
+  PPC::V0, PPC::V1, PPC::V2, PPC::V3,
+  PPC::V4, PPC::V5, PPC::V6, PPC::V7,
+  PPC::V8, PPC::V9, PPC::V10, PPC::V11,
+  PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+  PPC::V16, PPC::V17, PPC::V18, PPC::V19,
+  PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+  PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+  PPC::V28, PPC::V29, PPC::V30, PPC::V31
 };
 
 static const unsigned VSFRegs[] = {
@@ -242,6 +253,12 @@ static DecodeStatus DecodeF8RCRegisterCl
   return decodeRegisterClass(Inst, RegNo, FRegs);
 }
 
+static DecodeStatus DecodeVFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, VFRegs);
+}
+
 static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
                                             uint64_t Address,
                                             const void *Decoder) {

Modified: llvm/trunk/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp Tue Oct  4 01:59:23 2016
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCInstPrinter.h"
+#include "PPCInstrInfo.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCPredicates.h"
 #include "llvm/MC/MCExpr.h"
@@ -447,7 +448,7 @@ void PPCInstPrinter::printTLSCall(const
 /// stripRegisterPrefix - This method strips the character prefix from a
 /// register name so that only the number is left.  Used by for linux asm.
 static const char *stripRegisterPrefix(const char *RegName) {
-  if (FullRegNames)
+  if (FullRegNames || ShowVSRNumsAsVR)
     return RegName;
 
   switch (RegName[0]) {
@@ -468,15 +469,24 @@ void PPCInstPrinter::printOperand(const
                                   raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isReg()) {
-    const char *RegName = getRegisterName(Op.getReg());
-    if (ShowVSRNumsAsVR) {
-      unsigned RegNum = Op.getReg();
-      if (RegNum >= PPC::VSH0 && RegNum <= PPC::VSH31)
-        O << 'v' << RegNum - PPC::VSH0;
-      else
-        O << RegName;
-      return;
+    unsigned Reg = Op.getReg();
+
+    // There are VSX instructions that use VSX register numbering (vs0 - vs63)
+    // as well as those that use VMX register numbering (v0 - v31 which
+    // correspond to vs32 - vs63). If we have an instruction that uses VSX
+    // numbering, we need to convert the VMX registers to VSX registers.
+    // Namely, we print 32-63 when the instruction operates on one of the
+    // VMX registers.
+    // (Please synchronize with PPCAsmPrinter::printOperand)
+    if ((MII.get(MI->getOpcode()).TSFlags & PPCII::UseVSXReg) &&
+        !ShowVSRNumsAsVR) {
+      if (PPCInstrInfo::isVRRegister(Reg))
+        Reg = PPC::VSX32 + (Reg - PPC::V0);
+      else if (PPCInstrInfo::isVFRegister(Reg))
+        Reg = PPC::VSX32 + (Reg - PPC::VF0);
     }
+
+    const char *RegName = getRegisterName(Reg);
     // The linux and AIX assembler does not take register prefixes.
     if (!isDarwinSyntax())
       RegName = stripRegisterPrefix(RegName);

Modified: llvm/trunk/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp Tue Oct  4 01:59:23 2016
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "PPCInstrInfo.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCFixupKinds.h"
 #include "llvm/ADT/Statistic.h"
@@ -350,7 +351,6 @@ get_crbitm_encoding(const MCInst &MI, un
   return 0x80 >> CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
 }
 
-
 unsigned PPCMCCodeEmitter::
 getMachineOpValue(const MCInst &MI, const MCOperand &MO,
                   SmallVectorImpl<MCFixup> &Fixups,
@@ -361,7 +361,14 @@ getMachineOpValue(const MCInst &MI, cons
     assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 &&
             MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) ||
            MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
-    return CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+    unsigned Reg = MO.getReg();
+    unsigned Encode = CTX.getRegisterInfo()->getEncodingValue(Reg);
+
+    if ((MCII.get(MI.getOpcode()).TSFlags & PPCII::UseVSXReg))
+      if (PPCInstrInfo::isVRRegister(Reg))
+        Encode += 32;
+
+    return Encode;
   }
   
   assert(MO.isImm() &&

Modified: llvm/trunk/lib/Target/PowerPC/PPCAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCAsmPrinter.cpp?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCAsmPrinter.cpp Tue Oct  4 01:59:23 2016
@@ -167,7 +167,23 @@ void PPCAsmPrinter::printOperand(const M
 
   switch (MO.getType()) {
   case MachineOperand::MO_Register: {
-    const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
+    unsigned Reg = MO.getReg();
+
+    // There are VSX instructions that use VSX register numbering (vs0 - vs63)
+    // as well as those that use VMX register numbering (v0 - v31 which
+    // correspond to vs32 - vs63). If we have an instruction that uses VSX
+    // numbering, we need to convert the VMX registers to VSX registers.
+    // Namely, we print 32-63 when the instruction operates on one of the
+    // VMX registers.
+    // (Please synchronize with PPCInstPrinter::printOperand)
+    if (MI->getDesc().TSFlags & PPCII::UseVSXReg) {
+      if (PPCInstrInfo::isVRRegister(Reg))
+        Reg = PPC::VSX32 + (Reg - PPC::V0);
+      else if (PPCInstrInfo::isVFRegister(Reg))
+        Reg = PPC::VSX32 + (Reg - PPC::VF0);
+    }
+    const char *RegName = PPCInstPrinter::getRegisterName(Reg);
+
     // Linux assembler (Others?) does not take register mnemonics.
     // FIXME - What about special registers used in mfspr/mtspr?
     if (!Subtarget->isDarwin())

Modified: llvm/trunk/lib/Target/PowerPC/PPCCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCCallingConv.td?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCCallingConv.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCCallingConv.td Tue Oct  4 01:59:23 2016
@@ -68,11 +68,9 @@ def RetCC_PPC : CallingConv<[
  
   // Vector types returned as "direct" go into V2 .. V9; note that only the
   // ELFv2 ABI fully utilizes all these registers.
-  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], 
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
            CCIfSubtarget<"hasAltivec()",
-           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
-  CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
-           CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
+           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>
 ]>;
 
 // No explicit register is specified for the AnyReg calling convention. The
@@ -121,11 +119,9 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
   CCIfType<[f64],  CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
   CCIfType<[v4f64, v4f32, v4i1],
            CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
-  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], 
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
            CCIfSubtarget<"hasAltivec()",
-           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
-  CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
-           CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
+           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>
 ]>;
 
 //===----------------------------------------------------------------------===//
@@ -193,12 +189,9 @@ def CC_PPC32_SVR4 : CallingConv<[
     CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
 
   // The first 12 Vector arguments are passed in AltiVec registers.
-  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], 
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
            CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
                           V8, V9, V10, V11, V12, V13]>>>,
-  CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
-           CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9,
-                          VSH10, VSH11, VSH12, VSH13]>>>,
            
   CCDelegateTo<CC_PPC32_SVR4_Common>
 ]>;  
@@ -287,6 +280,5 @@ def CSR_64_AllRegs_Altivec : CalleeSaved
                                              (sequence "V%u", 0, 31))>;
 
 def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,
-                                         (sequence "VSL%u", 0, 31),
-                                         (sequence "VSH%u", 0, 31))>;
+                                         (sequence "VSL%u", 0, 31))>;
 

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue Oct  4 01:59:23 2016
@@ -685,7 +685,7 @@ PPCTargetLowering::PPCTargetLowering(con
     }
 
     if (Subtarget.isISA3_0() && Subtarget.hasDirectMove())
-      setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Legal);
+      setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
   }
 
   if (Subtarget.hasQPX()) {
@@ -1075,6 +1075,9 @@ const char *PPCTargetLowering::getTarget
   case PPCISD::STBRX:           return "PPCISD::STBRX";
   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
+  case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
+  case PPCISD::STXSIX:          return "PPCISD::STXSIX";
+  case PPCISD::VEXTS:           return "PPCISD::VEXTS";
   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
@@ -2986,7 +2989,7 @@ SDValue PPCTargetLowering::LowerFormalAr
           break;
         case MVT::v2f64:
         case MVT::v2i64:
-          RC = &PPC::VSHRCRegClass;
+          RC = &PPC::VRRCRegClass;
           break;
         case MVT::v4f64:
           RC = &PPC::QFRCRegClass;
@@ -3169,10 +3172,6 @@ SDValue PPCTargetLowering::LowerFormalAr
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
-  static const MCPhysReg VSRH[] = {
-    PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
-    PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
-  };
 
   const unsigned Num_GPR_Regs = array_lengthof(GPR);
   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
@@ -3448,9 +3447,7 @@ SDValue PPCTargetLowering::LowerFormalAr
       // passed directly.  The latter are used to implement ELFv2 homogenous
       // vector aggregates.
       if (VR_idx != Num_VR_Regs) {
-        unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
-                        MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
-                        MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         ++VR_idx;
       } else {
@@ -5056,10 +5053,6 @@ SDValue PPCTargetLowering::LowerCall_64S
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
-  static const MCPhysReg VSRH[] = {
-    PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
-    PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
-  };
 
   const unsigned NumGPRs = array_lengthof(GPR);
   const unsigned NumFPRs = 13;
@@ -5486,13 +5479,7 @@ SDValue PPCTargetLowering::LowerCall_64S
           SDValue Load =
               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
           MemOpChains.push_back(Load.getValue(1));
-
-          unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
-                           Arg.getSimpleValueType() == MVT::v2i64) ?
-                          VSRH[VR_idx] : VR[VR_idx];
-          ++VR_idx;
-
-          RegsToPass.push_back(std::make_pair(VReg, Load));
+          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
         }
         ArgOffset += 16;
         for (unsigned i=0; i<16; i+=PtrByteSize) {
@@ -5510,12 +5497,7 @@ SDValue PPCTargetLowering::LowerCall_64S
 
       // Non-varargs Altivec params go into VRs or on the stack.
       if (VR_idx != NumVRs) {
-        unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
-                         Arg.getSimpleValueType() == MVT::v2i64) ?
-                        VSRH[VR_idx] : VR[VR_idx];
-        ++VR_idx;
-
-        RegsToPass.push_back(std::make_pair(VReg, Arg));
+        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
       } else {
         if (CallConv == CallingConv::Fast)
           ComputePtrOff();
@@ -7094,7 +7076,7 @@ static SDValue BuildVSLDOI(SDValue LHS,
 }
 
 static bool isNonConstSplatBV(BuildVectorSDNode *BVN, EVT Type) {
-  if (BVN->getValueType(0) != Type)
+  if (BVN->isConstant() || BVN->getValueType(0) != Type)
     return false;
   auto OpZero = BVN->getOperand(0);
   for (int i = 1, e = BVN->getNumOperands(); i < e; i++)
@@ -7230,8 +7212,9 @@ SDValue PPCTargetLowering::LowerBUILD_VE
     auto OpZero = BVN->getOperand(0);
     bool CanLoadAndSplat = OpZero.getOpcode() == ISD::LOAD &&
       BVN->isOnlyUserOf(OpZero.getNode());
-    if (Subtarget.isISA3_0() &&
-        isNonConstSplatBV(BVN, MVT::v4i32) && !CanLoadAndSplat)
+    if (Subtarget.isISA3_0() && !CanLoadAndSplat &&
+        (isNonConstSplatBV(BVN, MVT::v4i32) ||
+         isNonConstSplatBV(BVN, MVT::v2i64)))
       return Op;
     return SDValue();
   }
@@ -10571,6 +10554,34 @@ SDValue PPCTargetLowering::combineFPToIn
   SDLoc dl(N);
   SDValue Op(N, 0);
 
+  SDValue FirstOperand(Op.getOperand(0));
+  bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
+    (FirstOperand.getValueType() == MVT::i8 ||
+     FirstOperand.getValueType() == MVT::i16);
+  if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
+    bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
+    bool DstDouble = Op.getValueType() == MVT::f64;
+    unsigned ConvOp = Signed ?
+      (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
+      (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
+    SDValue WidthConst =
+      DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
+                            dl, false);
+    LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
+    SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
+    SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
+                                         DAG.getVTList(MVT::f64, MVT::Other),
+                                         Ops, MVT::i8, LDN->getMemOperand());
+
+    // For signed conversion, we need to sign-extend the value in the VSR
+    if (Signed) {
+      SDValue ExtOps[] = { Ld, WidthConst };
+      SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
+      return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
+    } else
+      return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
+  }
+
   // Don't handle ppc_fp128 here or i1 conversions.
   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
     return SDValue();
@@ -10783,10 +10794,14 @@ SDValue PPCTargetLowering::PerformDAGCom
   case ISD::UINT_TO_FP:
     return combineFPToIntToFP(N, DCI);
   case ISD::STORE: {
+    EVT Op1VT = N->getOperand(1).getValueType();
+    bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) ||
+      (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16));
+
     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
     if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
-        N->getOperand(1).getValueType() == MVT::i32 &&
+        ValidTypeForStoreFltAsInt &&
         N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
       SDValue Val = N->getOperand(1).getOperand(0);
       if (Val.getValueType() == MVT::f32) {
@@ -10796,15 +10811,31 @@ SDValue PPCTargetLowering::PerformDAGCom
       Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
       DCI.AddToWorklist(Val.getNode());
 
-      SDValue Ops[] = {
-        N->getOperand(0), Val, N->getOperand(2),
-        DAG.getValueType(N->getOperand(1).getValueType())
-      };
+      if (Op1VT == MVT::i32) {
+        SDValue Ops[] = {
+          N->getOperand(0), Val, N->getOperand(2),
+          DAG.getValueType(N->getOperand(1).getValueType())
+        };
+
+        Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+                DAG.getVTList(MVT::Other), Ops,
+                cast<StoreSDNode>(N)->getMemoryVT(),
+                cast<StoreSDNode>(N)->getMemOperand());
+      } else {
+        unsigned WidthInBytes =
+          N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2;
+        SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false);
+
+        SDValue Ops[] = {
+          N->getOperand(0), Val, N->getOperand(2), WidthConst,
+          DAG.getValueType(N->getOperand(1).getValueType())
+        };
+        Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl,
+                                      DAG.getVTList(MVT::Other), Ops,
+                                      cast<StoreSDNode>(N)->getMemoryVT(),
+                                      cast<StoreSDNode>(N)->getMemOperand());
+      }
 
-      Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
-              DAG.getVTList(MVT::Other), Ops,
-              cast<StoreSDNode>(N)->getMemoryVT(),
-              cast<StoreSDNode>(N)->getMemOperand());
       DCI.AddToWorklist(Val.getNode());
       return Val;
     }

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Tue Oct  4 01:59:23 2016
@@ -50,6 +50,10 @@ namespace llvm {
       /// unsigned integers.
       FCTIDUZ, FCTIWUZ,
 
+      /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
+      /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
+      VEXTS,
+
       /// Reciprocal estimate instructions (unary FP ops).
       FRE, FRSQRTE,
 
@@ -365,6 +369,16 @@ namespace llvm {
       /// destination 64-bit register.
       LFIWZX,
 
+      /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an
+      /// integer smaller than 64 bits into a VSR. The integer is zero-extended.
+      /// This can be used for converting loaded integers to floating point.
+      LXSIZX,
+
+      /// STXSIX - The STXSI[bh]X instruction. The first operand is an input
+      /// chain, then an f64 value to store, then an address to store it to,
+      /// followed by a byte-width for the store.
+      STXSIX,
+
       /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
       /// Maps directly to an lxvd2x instruction that will be followed by
       /// an xxswapd.

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td Tue Oct  4 01:59:23 2016
@@ -706,6 +706,12 @@ def VSPLTW : VXForm_1<652, (outs vrrc:$v
                       "vspltw $vD, $vB, $UIMM", IIC_VecPerm,
                       [(set v16i8:$vD, 
                         (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
+let isCodeGenOnly = 1 in {
+  def VSPLTBs : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB),
+                         "vspltb $vD, $vB, $UIMM", IIC_VecPerm, []>;
+  def VSPLTHs : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB),
+                         "vsplth $vD, $vB, $UIMM", IIC_VecPerm, []>;
+}
 
 def VSR    : VX1_Int_Ty< 708, "vsr"  , int_ppc_altivec_vsr,  v4i32>;
 def VSRO   : VX1_Int_Ty<1100, "vsro" , int_ppc_altivec_vsro, v4i32>;
@@ -1270,6 +1276,9 @@ def VINSERTD : VX1_VT5_UIM5_VB5<973, "vi
 class VX_VT5_EO5_VB5<bits<11> xo, bits<5> eo, string opc, list<dag> pattern>
   : VXForm_RD5_XO5_RS5<xo, eo, (outs vrrc:$vD), (ins vrrc:$vB),
                        !strconcat(opc, " $vD, $vB"), IIC_VecGeneral, pattern>;
+class VX_VT5_EO5_VB5s<bits<11> xo, bits<5> eo, string opc, list<dag> pattern>
+  : VXForm_RD5_XO5_RS5<xo, eo, (outs vfrc:$vD), (ins vfrc:$vB),
+                       !strconcat(opc, " $vD, $vB"), IIC_VecGeneral, pattern>;
 
 // Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD]
 def VCLZLSBB : VXForm_RD5_XO5_RS5<1538, 0, (outs g8rc:$rD), (ins vrrc:$vB),
@@ -1292,6 +1301,13 @@ def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17,
 def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", []>;
 def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", []>;
 def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", []>;
+let isCodeGenOnly = 1 in {
+  def VEXTSB2Ws : VX_VT5_EO5_VB5s<1538, 16, "vextsb2w", []>;
+  def VEXTSH2Ws : VX_VT5_EO5_VB5s<1538, 17, "vextsh2w", []>;
+  def VEXTSB2Ds : VX_VT5_EO5_VB5s<1538, 24, "vextsb2d", []>;
+  def VEXTSH2Ds : VX_VT5_EO5_VB5s<1538, 25, "vextsh2d", []>;
+  def VEXTSW2Ds : VX_VT5_EO5_VB5s<1538, 26, "vextsw2d", []>;
+}
 
 // Vector Integer Negate
 def VNEGW : VX_VT5_EO5_VB5<1538, 6, "vnegw", []>;

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrFormats.td?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrFormats.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrFormats.td Tue Oct  4 01:59:23 2016
@@ -38,6 +38,14 @@ class I<bits<6> opcode, dag OOL, dag IOL
   let TSFlags{2}   = PPC970_Cracked;
   let TSFlags{5-3} = PPC970_Unit;
 
+  /// Indicate that the VSX instruction is to use VSX numbering/encoding.
+  /// Since ISA 3.0, there are scalar instructions that use the upper
+  /// half of the VSX register set only. Rather than adding further complexity
+  /// to the register class set, the VSX registers just include the Altivec
+  /// registers and this flag decides the numbering to be used for them.
+  bits<1> UseVSXReg = 0;
+  let TSFlags{6}   = UseVSXReg;
+
   // Fields used for relation models.
   string BaseName = "";
 
@@ -62,6 +70,8 @@ class PPC970_Unit_VALU     { bits<3> PPC
 class PPC970_Unit_VPERM    { bits<3> PPC970_Unit = 6;   }
 class PPC970_Unit_BRU      { bits<3> PPC970_Unit = 7;   }
 
+class UseVSXReg { bits<1> UseVSXReg = 1; }
+
 // Two joined instructions; used to emit two adjacent instructions as one.
 // The itinerary from the first instruction is used for scheduling and
 // classification.

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp Tue Oct  4 01:59:23 2016
@@ -859,15 +859,6 @@ void PPCInstrInfo::copyPhysReg(MachineBa
       llvm_unreachable("nop VSX copy");
 
     DestReg = SuperReg;
-  } else if (PPC::VRRCRegClass.contains(DestReg) &&
-             PPC::VSRCRegClass.contains(SrcReg)) {
-    unsigned SuperReg =
-      TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass);
-
-    if (VSXSelfCopyCrash && SrcReg == SuperReg)
-      llvm_unreachable("nop VSX copy");
-
-    DestReg = SuperReg;
   } else if (PPC::F8RCRegClass.contains(SrcReg) &&
              PPC::VSRCRegClass.contains(DestReg)) {
     unsigned SuperReg =
@@ -877,15 +868,6 @@ void PPCInstrInfo::copyPhysReg(MachineBa
       llvm_unreachable("nop VSX copy");
 
     SrcReg = SuperReg;
-  } else if (PPC::VRRCRegClass.contains(SrcReg) &&
-             PPC::VSRCRegClass.contains(DestReg)) {
-    unsigned SuperReg =
-      TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass);
-
-    if (VSXSelfCopyCrash && DestReg == SuperReg)
-      llvm_unreachable("nop VSX copy");
-
-    SrcReg = SuperReg;
   }
 
   // Different class register copy
@@ -1073,6 +1055,15 @@ PPCInstrInfo::storeRegToStackSlot(Machin
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   FuncInfo->setHasSpills();
 
+  // We need to avoid a situation in which the value from a VRRC register is
+  // spilled using an Altivec instruction and reloaded into a VSRC register
+  // using a VSX instruction. The issue with this is that the VSX
+  // load/store instructions swap the doublewords in the vector and the Altivec
+  // ones don't. The register classes on the spill/reload may be different if
+  // the register is defined using an Altivec instruction and is then used by a
+  // VSX instruction.
+  RC = updatedRC(RC);
+
   bool NonRI = false, SpillsVRS = false;
   if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs,
                           NonRI, SpillsVRS))
@@ -1185,6 +1176,16 @@ PPCInstrInfo::loadRegFromStackSlot(Machi
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   FuncInfo->setHasSpills();
 
+  // We need to avoid a situation in which the value from a VRRC register is
+  // spilled using an Altivec instruction and reloaded into a VSRC register
+  // using a VSX instruction. The issue with this is that the VSX
+  // load/store instructions swap the doublewords in the vector and the Altivec
+  // ones don't. The register classes on the spill/reload may be different if
+  // the register is defined using an Altivec instruction and is then used by a
+  // VSX instruction.
+  if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
+    RC = &PPC::VSRCRegClass;
+
   bool NonRI = false, SpillsVRS = false;
   if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs,
                            NonRI, SpillsVRS))
@@ -1884,3 +1885,10 @@ bool PPCInstrInfo::expandPostRAPseudo(Ma
   }
   return false;
 }
+
+const TargetRegisterClass *
+PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const {
+  if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
+    return &PPC::VSRCRegClass;
+  return RC;
+}

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h Tue Oct  4 01:59:23 2016
@@ -61,6 +61,15 @@ enum PPC970_Unit {
   PPC970_VPERM  = 6 << PPC970_Shift,   // Vector Permute Unit
   PPC970_BRU    = 7 << PPC970_Shift    // Branch Unit
 };
+
+enum {
+  /// Shift count to bypass PPC970 flags
+  NewDef_Shift = 6,
+
+  /// The VSX instruction that uses VSX register (vs0-vs63), instead of VMX
+  /// register (v0-v31).
+  UseVSXReg = 0x1 << NewDef_Shift
+};
 } // end namespace PPCII
 
 class PPCSubtarget;
@@ -273,6 +282,14 @@ public:
 
   // Lower pseudo instructions after register allocation.
   bool expandPostRAPseudo(MachineInstr &MI) const override;
+
+  static bool isVFRegister(unsigned Reg) {
+    return Reg >= PPC::VF0 && Reg <= PPC::VF31;
+  }
+  static bool isVRRegister(unsigned Reg) {
+    return Reg >= PPC::V0 && Reg <= PPC::V31;
+  }
+  const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const;
 };
 
 }

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Tue Oct  4 01:59:23 2016
@@ -23,6 +23,15 @@ def SDT_PPCstfiwx : SDTypeProfile<0, 2,
 def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x
   SDTCisVT<0, f64>, SDTCisPtrTy<1>
 ]>;
+def SDT_PPCLxsizx : SDTypeProfile<1, 2, [
+  SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
+]>;
+def SDT_PPCstxsix : SDTypeProfile<0, 3, [
+  SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
+]>;
+def SDT_PPCVexts  : SDTypeProfile<1, 2, [
+  SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2>
+]>;
 
 def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
 def SDT_PPCCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
@@ -108,6 +117,11 @@ def PPClfiwax : SDNode<"PPCISD::LFIWAX",
                        [SDNPHasChain, SDNPMayLoad]>;
 def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
                        [SDNPHasChain, SDNPMayLoad]>;
+def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx,
+                       [SDNPHasChain, SDNPMayLoad]>;
+def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix,
+                       [SDNPHasChain, SDNPMayStore]>;
+def PPCVexts  : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>;
 
 // Extract FPSCR (not modeled at the DAG level).
 def PPCmffs   : SDNode<"PPCISD::MFFS",
@@ -445,6 +459,12 @@ def PPCRegVRRCAsmOperand : AsmOperandCla
 def vrrc : RegisterOperand<VRRC> {
   let ParserMatchClass = PPCRegVRRCAsmOperand;
 }
+def PPCRegVFRCAsmOperand : AsmOperandClass {
+  let Name = "RegVFRC"; let PredicateMethod = "isRegNumber";
+}
+def vfrc : RegisterOperand<VFRC> {
+  let ParserMatchClass = PPCRegVFRCAsmOperand;
+}
 def PPCRegCRBITRCAsmOperand : AsmOperandClass {
   let Name = "RegCRBITRC"; let PredicateMethod = "isCRBitNumber";
 }

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td Tue Oct  4 01:59:23 2016
@@ -89,6 +89,22 @@ multiclass XX3Form_Rcr<bits<6> opcode, b
   }
 }
 
+// Instruction form with a single input register for instructions such as
+// XXPERMDI. The reason for defining this is that specifying multiple chained
+// operands (such as loads) to an instruction will perform both chained
+// operations rather than coalescing them into a single register - even though
+// the source memory location is the same. This simply forces the instruction
+// to use the same register for both inputs.
+// For example, an output DAG such as this:
+//   (XXPERMDI (LXSIBZX xoaddr:$src), (LXSIBZX xoaddr:$src ), 0))
+// would result in two load instructions emitted and used as separate inputs
+// to the XXPERMDI instruction.
+class XX3Form_2s<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+                 InstrItinClass itin, list<dag> pattern>
+  : XX3Form_2<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+    let XB = XA;
+}
+
 def HasVSX : Predicate<"PPCSubTarget->hasVSX()">;
 def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">;
 def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">;
@@ -96,6 +112,7 @@ def HasOnlySwappingMemOps : Predicate<"!
 
 let Predicates = [HasVSX] in {
 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+let UseVSXReg = 1 in {
 let hasSideEffects = 0 in { // VSX instructions don't have side effects.
 let Uses = [RM] in {
 
@@ -783,6 +800,9 @@ let Uses = [RM] in {
   def XXPERMDI : XX3Form_2<60, 10,
                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
                        "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
+  let isCodeGenOnly = 1 in
+  def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vfrc:$XA, u2imm:$DM),
+                             "xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>;
   def XXSEL : XX4Form<60, 3,
                       (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC),
                       "xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>;
@@ -797,7 +817,12 @@ let Uses = [RM] in {
                        "xxspltw $XT, $XB, $UIM", IIC_VecPerm,
                        [(set v4i32:$XT,
                              (PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>;
+  let isCodeGenOnly = 1 in
+  def XXSPLTWs : XX2Form_2<60, 164,
+                       (outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM),
+                       "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
 } // hasSideEffects
+} // UseVSXReg = 1
 
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
 // instruction selection into a branch sequence.
@@ -849,6 +874,12 @@ def : InstAlias<"xxmrgld $XT, $XA, $XB",
                 (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
 def : InstAlias<"xxswapd $XT, $XB",
                 (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
+def : InstAlias<"xxspltd $XT, $XB, 0",
+                (XXPERMDIs vsrc:$XT, vfrc:$XB, 0)>;
+def : InstAlias<"xxspltd $XT, $XB, 1",
+                (XXPERMDIs vsrc:$XT, vfrc:$XB, 3)>;
+def : InstAlias<"xxswapd $XT, $XB",
+                (XXPERMDIs vsrc:$XT, vfrc:$XB, 2)>;
 
 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
 
@@ -1071,6 +1102,22 @@ def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i6
 } // AddedComplexity
 } // HasVSX
 
+def ScalarLoads {
+  dag Li8 =       (i32 (extloadi8 xoaddr:$src));
+  dag ZELi8 =     (i32 (zextloadi8 xoaddr:$src));
+  dag ZELi8i64 =  (i64 (zextloadi8 xoaddr:$src));
+  dag SELi8 =     (i32 (sext_inreg (extloadi8 xoaddr:$src), i8));
+  dag SELi8i64 =  (i64 (sext_inreg (extloadi8 xoaddr:$src), i8));
+
+  dag Li16 =      (i32 (extloadi16 xoaddr:$src));
+  dag ZELi16 =    (i32 (zextloadi16 xoaddr:$src));
+  dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src));
+  dag SELi16 =    (i32 (sextloadi16 xoaddr:$src));
+  dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src));
+
+  dag Li32 = (i32 (load xoaddr:$src));
+}
+
 // The following VSX instructions were introduced in Power ISA 2.07
 /* FIXME: if the operands are v2i64, these patterns will not match.
    we should define new patterns or otherwise match the same patterns
@@ -1080,7 +1127,7 @@ def HasP8Vector : Predicate<"PPCSubTarge
 def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
 let Predicates = [HasP8Vector] in {
 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
-  let isCommutable = 1 in {
+  let isCommutable = 1, UseVSXReg = 1 in {
     def XXLEQV : XX3Form<60, 186,
                          (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                          "xxleqv $XT, $XA, $XB", IIC_VecGeneral,
@@ -1090,11 +1137,12 @@ let AddedComplexity = 400 in { // Prefer
                           "xxlnand $XT, $XA, $XB", IIC_VecGeneral,
                           [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA,
                                                     v4i32:$XB)))]>;
-  } // isCommutable
+  } // isCommutable, UseVSXReg
 
   def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
             (XXLEQV $A, $B)>;
 
+  let UseVSXReg = 1 in {
   def XXLORC : XX3Form<60, 170,
                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                        "xxlorc $XT, $XA, $XB", IIC_VecGeneral,
@@ -1122,6 +1170,7 @@ let AddedComplexity = 400 in { // Prefer
                           "stxsiwx $XT, $dst", IIC_LdStSTFD,
                           [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
   } // mayStore
+  } // UseVSXReg = 1
 
   def : Pat<(f64 (extloadf32 xoaddr:$src)),
             (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>;
@@ -1149,6 +1198,7 @@ let AddedComplexity = 400 in { // Prefer
   def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
             (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
 
+  let UseVSXReg = 1 in {
   // VSX Elementary Scalar FP arithmetic (SP)
   let isCommutable = 1 in {
     def XSADDSP : XX3Form<60, 0,
@@ -1273,6 +1323,7 @@ let AddedComplexity = 400 in { // Prefer
                           "xscvdpspn $XT, $XB", IIC_VecFP, []>;
   def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
                           "xscvspdpn $XT, $XB", IIC_VecFP, []>;
+  } // UseVSXReg = 1
 
   let Predicates = [IsLittleEndian] in {
   def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
@@ -1295,9 +1346,12 @@ let AddedComplexity = 400 in { // Prefer
   def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
             (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
   }
+  def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)),
+            (v4i32 (XXSPLTWs (LXSIWAX xoaddr:$src), 1))>;
 } // AddedComplexity = 400
 } // HasP8Vector
 
+let UseVSXReg = 1 in {
 let Predicates = [HasDirectMove] in {
   // VSX direct move instructions
   def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
@@ -1332,6 +1386,7 @@ let Predicates = [IsISA3_0, HasDirectMov
                               []>, Requires<[In64BitMode]>;
 
 } // IsISA3_0, HasDirectMove
+} // UseVSXReg = 1
 
 /*  Direct moves of various widths from GPR's into VSR's. Each move lines
     the value up into element 0 (both BE and LE). Namely, entities smaller than
@@ -1911,6 +1966,7 @@ let AddedComplexity = 400, Predicates =
     : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vbtype:$vB),
                     !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
 
+  let UseVSXReg = 1 in {
   // [PO T XO B XO BX /]
   class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
                         list<dag> pattern>
@@ -1929,6 +1985,7 @@ let AddedComplexity = 400, Predicates =
                   InstrItinClass itin, list<dag> pattern>
     : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
               !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
+  } // UseVSXReg = 1
 
   // [PO VRT VRA VRB XO /]
   class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
@@ -1997,7 +2054,8 @@ let AddedComplexity = 400, Predicates =
   // DP/QP Compare Exponents
   def XSCMPEXPDP : XX3Form_1<60, 59,
                              (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
-                             "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
+                             "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>,
+                   UseVSXReg;
   def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
 
   // DP Compare ==, >=, >, !=
@@ -2011,6 +2069,7 @@ let AddedComplexity = 400, Predicates =
                                   IIC_FPCompare, []>;
   def XSCMPNEDP : XX3_XT5_XA5_XB5<60, 27, "xscmpnedp", vsrc, vsfrc, vsfrc,
                                   IIC_FPCompare, []>;
+  let UseVSXReg = 1 in {
   // Vector Compare Not Equal
   def XVCMPNEDP  : XX3Form_Rc<60, 123,
                               (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
@@ -2028,12 +2087,13 @@ let AddedComplexity = 400, Predicates =
                               (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                               "xvcmpnesp. $XT, $XA, $XB", IIC_VecFPCompare, []>,
                               isDOT;
+  } // UseVSXReg = 1
 
   //===--------------------------------------------------------------------===//
   // Quad-Precision Floating-Point Conversion Instructions:
 
   // Convert DP -> QP
-  def XSCVDPQP  : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vsfrc, []>;
+  def XSCVDPQP  : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc, []>;
 
   // Round & Convert QP -> DP (dword[1] is set to zero)
   def XSCVQPDP  : X_VT5_XO5_VB5   <63, 20, 836, "xscvqpdp" , []>;
@@ -2046,8 +2106,8 @@ let AddedComplexity = 400, Predicates =
   def XSCVQPUWZ : X_VT5_XO5_VB5<63,  1, 836, "xscvqpuwz", []>;
 
   // Convert (Un)Signed DWord -> QP
-  def XSCVSDQP  : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vsfrc, []>;
-  def XSCVUDQP  : X_VT5_XO5_VB5_TyVB<63,  2, 836, "xscvudqp", vsfrc, []>;
+  def XSCVSDQP  : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vfrc, []>;
+  def XSCVUDQP  : X_VT5_XO5_VB5_TyVB<63,  2, 836, "xscvudqp", vfrc, []>;
 
   //===--------------------------------------------------------------------===//
   // Round to Floating-Point Integer Instructions
@@ -2084,7 +2144,7 @@ let AddedComplexity = 400, Predicates =
   // Insert Exponent DP/QP
   // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU
   def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
-                          "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>;
+                          "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>, UseVSXReg;
   // vB NOTE: only vB.dword[0] is used, that's why we don't use
   //          X_VT5_VA5_VB5 form
   def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
@@ -2093,10 +2153,12 @@ let AddedComplexity = 400, Predicates =
   // Extract Exponent/Significand DP/QP
   def XSXEXPDP : XX2_RT5_XO5_XB6<60,  0, 347, "xsxexpdp", []>;
   def XSXSIGDP : XX2_RT5_XO5_XB6<60,  1, 347, "xsxsigdp", []>;
+
   def XSXEXPQP : X_VT5_XO5_VB5  <63,  2, 804, "xsxexpqp", []>;
   def XSXSIGQP : X_VT5_XO5_VB5  <63, 18, 804, "xsxsigqp", []>;
 
   // Vector Insert Word
+  let UseVSXReg = 1 in {
   // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
   def XXINSERTW   :
     XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT),
@@ -2110,6 +2172,7 @@ let AddedComplexity = 400, Predicates =
   def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165,
                                   (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM),
                                   "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>;
+  } // UseVSXReg = 1
 
   // Vector Insert Exponent DP/SP
   def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc,
@@ -2126,23 +2189,27 @@ let AddedComplexity = 400, Predicates =
   //===--------------------------------------------------------------------===//
 
   // Test Data Class SP/DP/QP
+  let UseVSXReg = 1 in {
   def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298,
                               (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
                               "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>;
   def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362,
                               (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
                               "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>;
+  } // UseVSXReg = 1
   def XSTSTDCQP : X_BF3_DCMX7_RS5  <63, 708,
                               (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB),
                               "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>;
 
   // Vector Test Data Class SP/DP
+  let UseVSXReg = 1 in {
   def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5,
                               (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
                               "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP, []>;
   def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5,
                               (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
                               "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP, []>;
+  } // UseVSXReg = 1
 
   //===--------------------------------------------------------------------===//
 
@@ -2173,7 +2240,7 @@ let AddedComplexity = 400, Predicates =
 
   // Vector Splat Immediate Byte
   def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
-                            "xxspltib $XT, $IMM8", IIC_VecPerm, []>;
+                            "xxspltib $XT, $IMM8", IIC_VecPerm, []>, UseVSXReg;
 
   //===--------------------------------------------------------------------===//
   // Vector/Scalar Load/Store Instructions
@@ -2181,12 +2248,12 @@ let AddedComplexity = 400, Predicates =
   let mayLoad = 1 in {
   // Load Vector
   def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
-                            "lxv $XT, $src", IIC_LdStLFD, []>;
+                            "lxv $XT, $src", IIC_LdStLFD, []>, UseVSXReg;
   // Load DWord
-  def LXSD  : DSForm_1<57, 2, (outs vrrc:$vD), (ins memrix:$src),
+  def LXSD  : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src),
                        "lxsd $vD, $src", IIC_LdStLFD, []>;
   // Load SP from src, convert it to DP, and place in dword[0]
-  def LXSSP : DSForm_1<57, 3, (outs vrrc:$vD), (ins memrix:$src),
+  def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src),
                        "lxssp $vD, $src", IIC_LdStLFD, []>;
 
   // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different
@@ -2194,11 +2261,13 @@ let AddedComplexity = 400, Predicates =
   class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
                       RegisterOperand vtype, list<dag> pattern>
     : XX1Form<opcode, xo, (outs vtype:$XT), (ins memrr:$src),
-              !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>;
+              !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>, UseVSXReg;
 
   // Load as Integer Byte/Halfword & Zero Indexed
-  def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc, []>;
-  def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc, []>;
+  def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc,
+                              [(set f64:$XT, (PPClxsizx xoaddr:$src, 1))]>;
+  def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc,
+                              [(set f64:$XT, (PPClxsizx xoaddr:$src, 2))]>;
 
   // Load Vector Halfword*8/Byte*16 Indexed
   def LXVH8X  : X_XT6_RA5_RB5<31, 812, "lxvh8x" , vsrc, []>;
@@ -2214,28 +2283,34 @@ let AddedComplexity = 400, Predicates =
 
   // Load Vector Word & Splat Indexed
   def LXVWSX  : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>;
-  } // end mayLoad
+  } // mayLoad
 
   let mayStore = 1 in {
   // Store Vector
   def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
-                             "stxv $XT, $dst", IIC_LdStSTFD, []>;
+                             "stxv $XT, $dst", IIC_LdStSTFD, []>, UseVSXReg;
   // Store DWord
-  def STXSD  : DSForm_1<61, 2, (outs), (ins vrrc:$vS, memrix:$dst),
+  def STXSD  : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst),
                         "stxsd $vS, $dst", IIC_LdStSTFD, []>;
   // Convert DP of dword[0] to SP, and Store to dst
-  def STXSSP : DSForm_1<61, 3, (outs), (ins vrrc:$vS, memrix:$dst),
+  def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst),
                         "stxssp $vS, $dst", IIC_LdStSTFD, []>;
 
   // [PO S RA RB XO SX]
   class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
                       RegisterOperand vtype, list<dag> pattern>
     : XX1Form<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst),
-              !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>;
+              !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>, UseVSXReg;
 
   // Store as Integer Byte/Halfword Indexed
-  def STXSIBX  : X_XS6_RA5_RB5<31,  909, "stxsibx" , vsfrc, []>;
-  def STXSIHX  : X_XS6_RA5_RB5<31,  941, "stxsihx" , vsfrc, []>;
+  def STXSIBX  : X_XS6_RA5_RB5<31,  909, "stxsibx" , vsfrc,
+                               [(PPCstxsix f64:$XT, xoaddr:$dst, 1)]>;
+  def STXSIHX  : X_XS6_RA5_RB5<31,  941, "stxsihx" , vsfrc,
+                               [(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>;
+  let isCodeGenOnly = 1 in {
+    def STXSIBXv  : X_XS6_RA5_RB5<31,  909, "stxsibx" , vrrc, []>;
+    def STXSIHXv  : X_XS6_RA5_RB5<31,  941, "stxsihx" , vrrc, []>;
+  }
 
   // Store Vector Halfword*8/Byte*16 Indexed
   def STXVH8X  : X_XS6_RA5_RB5<31,  940, "stxvh8x" , vsrc, []>;
@@ -2248,7 +2323,7 @@ let AddedComplexity = 400, Predicates =
   // Store Vector (Left-justified) with Length
   def STXVL    : X_XS6_RA5_RB5<31,  397, "stxvl"   , vsrc, []>;
   def STXVLL   : X_XS6_RA5_RB5<31,  429, "stxvll"  , vsrc, []>;
-  } // end mayStore
+  } // mayStore
 
   // Patterns for which instructions from ISA 3.0 are a better match
   let Predicates = [IsLittleEndian, HasP9Vector] in {
@@ -2341,6 +2416,146 @@ let AddedComplexity = 400, Predicates =
             (v4i32 (XXSPLTIB 255))>;
   def : Pat<(v2i64 immAllOnesV),
             (v2i64 (XXSPLTIB 255))>;
+
+  // Build vectors from i8 loads
+  def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)),
+            (v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>;
+  def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)),
+            (v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>;
+  def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)),
+           (v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>;
+  def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)),
+            (v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>;
+  def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)),
+            (v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>;
+  def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)),
+            (v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>;
+
+  // Build vectors from i16 loads
+  def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)),
+            (v8i16 (VSPLTHs 3, (LXSIHZX xoaddr:$src)))>;
+  def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)),
+            (v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>;
+  def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)),
+           (v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>;
+  def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)),
+            (v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>;
+  def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
+            (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;
+
+  let Predicates = [IsBigEndian, HasP9Vector] in {
+  // Scalar stores of i8
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 9), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 10), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 11), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 12), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 13), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 14), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 15), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
+            (STXSIBXv $S, xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 1), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 2), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 3), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 4), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 5), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 6), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 7), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 8), xoaddr:$dst)>;
+
+  // Scalar stores of i16
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 10), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 12), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 14), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
+            (STXSIHXv $S, xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 2), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 4), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 6), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 8), xoaddr:$dst)>;
+  } // IsBigEndian, HasP9Vector
+
+  let Predicates = [IsLittleEndian, HasP9Vector] in {
+  // Scalar stores of i8
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 8), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 7), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 6), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 5), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 4), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 3), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 2), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 1), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
+            (STXSIBXv $S, xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 15), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 14), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 13), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 12), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 11), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 10), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 9), xoaddr:$dst)>;
+
+  // Scalar stores of i16
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 8), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 6), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 4), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 2), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
+            (STXSIHXv $S, xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 14), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 12), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 10), xoaddr:$dst)>;
+  } // IsLittleEndian, HasP9Vector
+
+  // Vector sign extensions
+  def : Pat<(f64 (PPCVexts f64:$A, 1)),
+            (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>;
+  def : Pat<(f64 (PPCVexts f64:$A, 2)),
+            (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
 } // end HasP9Vector, AddedComplexity
 
 let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {

Modified: llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp Tue Oct  4 01:59:23 2016
@@ -170,10 +170,67 @@ bool PPCMIPeephole::simplifyCode(void) {
                 ToErase = &MI;
                 Simplified = true;
               }
+            } else if ((Immed == 0 || Immed == 3) &&
+                       DefMI && DefMI->getOpcode() == PPC::XXPERMDIs) {
+              // Splat fed by another splat - switch the output of the first
+              // and remove the second.
+              DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
+              ToErase = &MI;
+              Simplified = true;
+              DEBUG(dbgs() << "Removing redundant splat: ");
+              DEBUG(MI.dump());
             }
           }
         }
         break;
+      }
+      case PPC::VSPLTB:
+      case PPC::VSPLTH:
+      case PPC::XXSPLTW: {
+        unsigned MyOpcode = MI.getOpcode();
+        unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
+        unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
+        MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
+        if (!DefMI)
+          break;
+        unsigned DefOpcode = DefMI->getOpcode();
+        bool SameOpcode = (MyOpcode == DefOpcode) ||
+          (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
+          (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
+          (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs);
+        // Splat fed by another splat - switch the output of the first
+        // and remove the second.
+        if (SameOpcode) {
+          DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
+          ToErase = &MI;
+          Simplified = true;
+          DEBUG(dbgs() << "Removing redundant splat: ");
+          DEBUG(MI.dump());
+        }
+        // Splat fed by a shift. Usually when we align value to splat into
+        // vector element zero.
+        if (DefOpcode == PPC::XXSLDWI) {
+          unsigned ShiftRes = DefMI->getOperand(0).getReg();
+          unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
+          unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
+          unsigned ShiftImm = DefMI->getOperand(3).getImm();
+          unsigned SplatImm = MI.getOperand(2).getImm();
+          if (ShiftOp1 == ShiftOp2) {
+            unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
+            if (MRI->hasOneNonDBGUse(ShiftRes)) {
+              DEBUG(dbgs() << "Removing redundant shift: ");
+              DEBUG(DefMI->dump());
+              ToErase = DefMI;
+            }
+            Simplified = true;
+            DEBUG(dbgs() << "Changing splat immediate from " << SplatImm <<
+                  " to " << NewElem << " in instruction: ");
+            DEBUG(MI.dump());
+            MI.getOperand(1).setReg(ShiftOp1);
+            MI.getOperand(2).setImm(NewElem);
+          }
+        }
+        break;
       }
       }
     }

Modified: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp Tue Oct  4 01:59:23 2016
@@ -303,7 +303,6 @@ unsigned PPCRegisterInfo::getRegPressure
   case PPC::VRRCRegClassID:
   case PPC::VFRCRegClassID:
   case PPC::VSLRCRegClassID:
-  case PPC::VSHRCRegClassID:
     return 32 - DefaultSafety;
   case PPC::VSRCRegClassID:
   case PPC::VSFRCRegClassID:

Modified: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.td?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.td Tue Oct  4 01:59:23 2016
@@ -17,7 +17,6 @@ def sub_eq : SubRegIndex<1, 2>;
 def sub_un : SubRegIndex<1, 3>;
 def sub_32 : SubRegIndex<32>;
 def sub_64 : SubRegIndex<64>;
-def sub_128 : SubRegIndex<128>;
 }
 
 
@@ -79,15 +78,6 @@ class VSRL<FPR SubReg, string n> : PPCRe
   let SubRegIndices = [sub_64];
 }
 
-// VSRH - One of the 32 128-bit VSX registers that overlap with the vector
-// registers.
-class VSRH<VR SubReg, string n> : PPCReg<n> {
-  let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
-  let HWEncoding{5} = 1;
-  let SubRegs = [SubReg];
-  let SubRegIndices = [sub_128];
-}
-
 // CR - One of the 8 4-bit condition registers
 class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
   let HWEncoding{2-0} = num;
@@ -116,9 +106,12 @@ foreach Index = 0-31 in {
                 DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
 }
 
-// Floating-point vector subregisters (for VSX)
+// 64-bit Floating-point subregisters of Altivec registers
+// Note: the register names are v0-v31 or vs32-vs63 depending on the use.
+//       Custom C++ code is used to produce the correct name and encoding.
 foreach Index = 0-31 in {
-  def VF#Index : VF<Index, "vs" # !add(Index, 32)>;
+  def VF#Index : VF<Index, "v" #Index>,
+                 DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
 }
 
 // QPX Floating-point registers
@@ -138,9 +131,11 @@ foreach Index = 0-31 in {
   def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>,
                   DwarfRegAlias<!cast<FPR>("F"#Index)>;
 }
-foreach Index = 0-31 in {
-  def VSH#Index : VSRH<!cast<VR>("V"#Index), "vs" # !add(Index, 32)>,
-                  DwarfRegAlias<!cast<VR>("V"#Index)>;
+
+// Dummy VSX registers, this defines string: "vs32"-"vs63", and is only used for
+// asm printing.
+foreach Index = 32-63 in {
+  def VSX#Index : PPCReg<"vs"#Index>;
 }
 
 // The reprsentation of r0 when treated as the constant 0.
@@ -288,7 +283,7 @@ def F8RC : RegisterClass<"PPC", [f64], 6
                                                 (sequence "F%u", 31, 14))>;
 def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
 
-def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32], 128,
+def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32,v2f64], 128,
                          (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
                              V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
                              V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
@@ -298,14 +293,8 @@ def VRRC : RegisterClass<"PPC", [v16i8,v
 def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
                           (add (sequence "VSL%u", 0, 13),
                                (sequence "VSL%u", 31, 14))>;
-def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
-                          (add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7,
-			       VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14,
-                               VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30,
-                               VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23,
-                               VSH22, VSH21, VSH20)>;
 def VSRC  : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
-                          (add VSLRC, VSHRC)>;
+                          (add VSLRC, VRRC)>;
 
 // Register classes for the 64-bit "scalar" VSX subregisters.
 def VFRC :  RegisterClass<"PPC", [f64], 64,

Modified: llvm/trunk/lib/Target/PowerPC/PPCVSXCopy.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCVSXCopy.cpp?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCVSXCopy.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCVSXCopy.cpp Tue Oct  4 01:59:23 2016
@@ -101,11 +101,8 @@ protected:
           // This is a copy *to* a VSX register from a non-VSX register.
           Changed = true;
 
-          const TargetRegisterClass *SrcRC =
-            IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
-                                           &PPC::VSLRCRegClass;
+          const TargetRegisterClass *SrcRC = &PPC::VSLRCRegClass;
           assert((IsF8Reg(SrcMO.getReg(), MRI) ||
-                  IsVRReg(SrcMO.getReg(), MRI) ||
                   IsVSSReg(SrcMO.getReg(), MRI) ||
                   IsVSFReg(SrcMO.getReg(), MRI)) &&
                  "Unknown source for a VSX copy");
@@ -116,8 +113,7 @@ protected:
               .addImm(1) // add 1, not 0, because there is no implicit clearing
                          // of the high bits.
               .addOperand(SrcMO)
-              .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128
-                                                   : PPC::sub_64);
+              .addImm(PPC::sub_64);
 
           // The source of the original copy is now the new virtual register.
           SrcMO.setReg(NewVReg);
@@ -126,13 +122,10 @@ protected:
           // This is a copy *from* a VSX register to a non-VSX register.
           Changed = true;
 
-          const TargetRegisterClass *DstRC =
-            IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
-                                           &PPC::VSLRCRegClass;
+          const TargetRegisterClass *DstRC = &PPC::VSLRCRegClass;
           assert((IsF8Reg(DstMO.getReg(), MRI) ||
                   IsVSFReg(DstMO.getReg(), MRI) ||
-                  IsVSSReg(DstMO.getReg(), MRI) ||
-                  IsVRReg(DstMO.getReg(), MRI)) &&
+                  IsVSSReg(DstMO.getReg(), MRI)) &&
                  "Unknown destination for a VSX copy");
 
           // Copy the VSX value into a new VSX register of the correct subclass.
@@ -143,8 +136,7 @@ protected:
 
           // Transform the original copy into a subregister extraction copy.
           SrcMO.setReg(NewVReg);
-          SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 :
-                                                         PPC::sub_64);
+          SrcMO.setSubReg(PPC::sub_64);
         }
       }
 

Modified: llvm/trunk/test/CodeGen/PowerPC/inline-asm-scalar-to-vector-error.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/inline-asm-scalar-to-vector-error.ll?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/inline-asm-scalar-to-vector-error.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/inline-asm-scalar-to-vector-error.ll Tue Oct  4 01:59:23 2016
@@ -7,8 +7,5 @@ define hidden void @f(i32 %x) {
   ; CHECK: scalar-to-vector conversion failed, possible invalid constraint for vector type
   tail call void asm sideeffect "nop", "{vsl1}"(i32 %x) nounwind
 
-  ; CHECK: scalar-to-vector conversion failed, possible invalid constraint for vector type
-  tail call void asm sideeffect "nop", "{vsh1}"(i32 %x) nounwind
-
   ret void
 }

Modified: llvm/trunk/test/CodeGen/PowerPC/load-v4i8-improved.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/load-v4i8-improved.ll?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/load-v4i8-improved.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/load-v4i8-improved.ll Tue Oct  4 01:59:23 2016
@@ -1,8 +1,7 @@
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
 ; RUN:   -implicit-check-not vmrg -implicit-check-not=vperm %s
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck \
-; RUN:   -implicit-check-not vmrg -implicit-check-not=vperm %s \
-; RUN:   --check-prefix=CHECK-BE
+; RUN:   -implicit-check-not vmrg -implicit-check-not=vperm %s
 
 define <16 x i8> @test(i32* %s, i32* %t) {
 entry:
@@ -11,13 +10,6 @@ entry:
   %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   ret <16 x i8> %2
 ; CHECK-LABEL: test
-; CHECK: lwz [[GPR:[0-9]+]], 0(3)
-; CHECK: mtvsrd [[VSR:[0-9]+]], [[GPR]]
-; CHECK: xxswapd  [[SWP:[0-9]+]], [[VSR]]
-; CHECK: xxspltw 34, [[SWP]], 3
-; CHECK-BE-LABEL: test
-; CHECK-BE: lwz [[GPR:[0-9]+]], 0(3)
-; CHECK-BE: sldi [[SHL:[0-9]+]], [[GPR]], 32
-; CHECK-BE: mtvsrd [[VSR:[0-9]+]], [[SHL]]
-; CHECK-BE: xxspltw 34, [[VSR]], 0
+; CHECK: lxsiwax 34, 0, 3
+; CHECK: xxspltw 34, 34, 1
 }

Modified: llvm/trunk/test/CodeGen/PowerPC/machine-combiner.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/machine-combiner.ll?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/machine-combiner.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/machine-combiner.ll Tue Oct  4 01:59:23 2016
@@ -98,7 +98,6 @@ define <4 x float> @vector_reassociate_a
 ; CHECK-PWR:       xvaddsp [[REG0:[0-9]+]], 34, 35
 ; CHECK-PWR:       xvaddsp [[REG1:[0-9]+]], 36, 37
 ; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
-; CHECK-PWR:       # kill
 ; CHECK-NEXT:  blr
 
   %t0 = fadd <4 x float> %x0, %x1
@@ -116,7 +115,6 @@ define <4 x float> @vector_reassociate_a
 ; CHECK-PWR:       xvaddsp [[REG0:[0-9]+]], 34, 35
 ; CHECK-PWR:       xvaddsp [[REG1:[0-9]+]], 36, 37
 ; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
-; CHECK-PWR:       # kill
 ; CHECK-NEXT:  blr
 
   %t0 = fadd <4 x float> %x0, %x1
@@ -134,7 +132,6 @@ define <4 x float> @vector_reassociate_a
 ; CHECK-PWR:       xvaddsp [[REG0:[0-9]+]], 34, 35
 ; CHECK-PWR:       xvaddsp [[REG1:[0-9]+]], 36, 37
 ; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
-; CHECK-PWR:       # kill
 ; CHECK-NEXT:  blr
 
   %t0 = fadd <4 x float> %x0, %x1
@@ -152,7 +149,6 @@ define <4 x float> @vector_reassociate_a
 ; CHECK-PWR:       xvaddsp [[REG0:[0-9]+]], 34, 35
 ; CHECK-PWR:       xvaddsp [[REG1:[0-9]+]], 36, 37
 ; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
-; CHECK-PWR:       # kill
 ; CHECK-NEXT:  blr
 
   %t0 = fadd <4 x float> %x0, %x1

Modified: llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll Tue Oct  4 01:59:23 2016
@@ -63,7 +63,7 @@ entry:
   ret <2 x i64> %splat.splat
 ; CHECK: mtvsrd {{[0-9]+}}, 3
 ; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
-; CHECK-LE: xxspltd [[REG1]], [[REG1]], 0
+; CHECK-LE: xxspltd 34, [[REG1]], 0
 }
 
 ; Function Attrs: nounwind
@@ -75,9 +75,10 @@ entry:
   %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
   %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
   ret <4 x float> %splat.splat
-; CHECK: xscvdpspn {{[0-9]+}}, 1
+; CHECK: xscvdpspn [[REG1:[0-9]+]], 1
+; CHECK: xxspltw 34, [[REG1]]
 ; CHECK-LE: xscvdpspn [[REG1:[0-9]+]], 1
-; CHECK-LE: xxsldwi {{[0-9]+}}, [[REG1]], [[REG1]], 1
+; CHECK-LE: xxspltw 34, [[REG1]]
 }
 
 ; The optimization to remove stack operations from PPCDAGToDAGISel::Select

Modified: llvm/trunk/test/CodeGen/PowerPC/power9-moves-and-splats.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/power9-moves-and-splats.ll?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/power9-moves-and-splats.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/power9-moves-and-splats.ll Tue Oct  4 01:59:23 2016
@@ -7,10 +7,18 @@
 
 define <2 x i64> @test1(i64 %a, i64 %b) {
 entry:
+; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp
+; which will happen in a subsequent patch.
 ; CHECK-LABEL: test1
-; CHECK: mtvsrdd 34, 4, 3
+; FIXME: mtvsrdd 34, 4, 3
+; CHECK: mtvsrd {{[0-9]+}}, 3
+; CHECK: mtvsrd {{[0-9]+}}, 4
+; CHECK: xxmrgld
 ; CHECK-BE-LABEL: test1
-; CHECK-BE: mtvsrdd 34, 3, 4
+; FIXME-BE: mtvsrdd 34, 3, 4
+; CHECK-BE: mtvsrd {{[0-9]+}}, 4
+; CHECK-BE: mtvsrd {{[0-9]+}}, 3
+; CHECK-BE: xxmrghd
   %vecins = insertelement <2 x i64> undef, i64 %a, i32 0
   %vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1
   ret <2 x i64> %vecins1

Modified: llvm/trunk/test/CodeGen/PowerPC/ppc64-i128-abi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc64-i128-abi.ll?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc64-i128-abi.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/ppc64-i128-abi.ll Tue Oct  4 01:59:23 2016
@@ -55,9 +55,12 @@ define <1 x i128> @v1i128_increment_by_o
 ; CHECK-LE: blr
 
 ; CHECK-P9-LABEL: @v1i128_increment_by_one
-; CHECK-P9-DAG: li [[R1:r[0-9]+]], 1
-; CHECK-P9-DAG: li [[R2:r[0-9]+]], 0
-; CHECK-P9: mtvsrdd [[V1:v[0-9]+]], [[R2]], [[R1]]
+; The below FIXME is due to the lowering for BUILD_VECTOR that will be fixed
+; in a subsequent patch.
+; FIXME: li [[R1:r[0-9]+]], 1
+; FIXME: li [[R2:r[0-9]+]], 0
+; FIXME: mtvsrdd [[V1:v[0-9]+]], [[R2]], [[R1]]
+; CHECK-P9: lxvx [[V1:v[0-9]+]]
 ; CHECK-P9: vadduqm v2, v2, [[V1]]
 ; CHECK-P9: blr
 

Modified: llvm/trunk/test/CodeGen/PowerPC/select-i1-vs-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/select-i1-vs-i1.ll?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/select-i1-vs-i1.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/select-i1-vs-i1.ll Tue Oct  4 01:59:23 2016
@@ -714,18 +714,12 @@ entry:
   %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
   ret <4 x float> %cond
 
-; FIXME: This test (and the other v4f32 tests) should use the same bclr
-; technique as the v2f64 tests below.
-
 ; CHECK-LABEL: @testv4floatslt
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
-; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
-; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
-; CHECK: xxlor [[REG2]], 35, 35
-; CHECK: .LBB[[BB]]:
-; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
 ; CHECK: blr
 }
 
@@ -740,12 +734,9 @@ entry:
 ; CHECK-LABEL: @testv4floatult
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
-; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
-; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
-; CHECK: xxlor [[REG2]], 35, 35
-; CHECK: .LBB[[BB]]:
-; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
 ; CHECK: blr
 }
 
@@ -760,12 +751,9 @@ entry:
 ; CHECK-LABEL: @testv4floatsle
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
-; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
-; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
-; CHECK: xxlor [[REG2]], 35, 35
-; CHECK: .LBB[[BB]]:
-; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
 ; CHECK: blr
 }
 
@@ -780,12 +768,9 @@ entry:
 ; CHECK-LABEL: @testv4floatule
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
-; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
-; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
-; CHECK: xxlor [[REG2]], 35, 35
-; CHECK: .LBB[[BB]]:
-; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
 ; CHECK: blr
 }
 
@@ -800,12 +785,11 @@ entry:
 ; CHECK-LABEL: @testv4floateq
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
-; CHECK-DAG: xxlor [[REG2:[0-9]+]], 35, 35
-; CHECK-DAG: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
-; CHECK: xxlor [[REG2]], 34, 34
-; CHECK: .LBB[[BB]]:
-; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB1:[0-9_]+]]
+; CHECK: vor 3, 2, 2
+; CHECK: .LBB[[BB1]]
+; CHECK: vor 2, 3, 3
 ; CHECK: blr
 }
 
@@ -820,12 +804,9 @@ entry:
 ; CHECK-LABEL: @testv4floatsge
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
-; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
-; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
-; CHECK: xxlor [[REG2]], 35, 35
-; CHECK: .LBB[[BB]]:
-; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
 ; CHECK: blr
 }
 
@@ -840,12 +821,9 @@ entry:
 ; CHECK-LABEL: @testv4floatuge
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
-; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
-; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
-; CHECK: xxlor [[REG2]], 35, 35
-; CHECK: .LBB[[BB]]:
-; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
 ; CHECK: blr
 }
 
@@ -860,12 +838,9 @@ entry:
 ; CHECK-LABEL: @testv4floatsgt
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
-; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
-; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
-; CHECK: xxlor [[REG2]], 35, 35
-; CHECK: .LBB[[BB]]:
-; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
 ; CHECK: blr
 }
 
@@ -880,12 +855,9 @@ entry:
 ; CHECK-LABEL: @testv4floatugt
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
-; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
-; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
-; CHECK: xxlor [[REG2]], 35, 35
-; CHECK: .LBB[[BB]]:
-; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
 ; CHECK: blr
 }
 
@@ -900,12 +872,9 @@ entry:
 ; CHECK-LABEL: @testv4floatne
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
-; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
-; CHECK-DAG: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
-; CHECK: xxlor [[REG2]], 35, 35
-; CHECK: .LBB[[BB]]:
-; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
 ; CHECK: blr
 }
 
@@ -1023,7 +992,7 @@ entry:
 ; CHECK: bc 12, [[REG1]], .LBB[[BB55:[0-9_]+]]
 ; CHECK: vor 3, 2, 2
 ; CHECK: .LBB[[BB55]]
-; CHECK: xxlor 34, 35, 35
+; CHECK: vor 2, 3, 3
 ; CHECK: blr
 }
 

Modified: llvm/trunk/test/CodeGen/PowerPC/sjlj.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/sjlj.ll?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/sjlj.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/sjlj.ll Tue Oct  4 01:59:23 2016
@@ -66,7 +66,7 @@ return:
 ; CHECK-NOT: mfspr
 
 ; CHECK-DAG: stfd
-; CHECK-DAG: stvx
+; CHECK-DAG: stxvd2x
 
 ; CHECK-DAG: addis [[REG:[0-9]+]], 2, env_sigill at toc@ha
 ; CHECK-DAG: std 31, env_sigill at toc@l([[REG]])
@@ -82,7 +82,7 @@ return:
 ; CHECK: .LBB1_4:
 
 ; CHECK: lfd
-; CHECK: lvx
+; CHECK: lxvd2x
 ; CHECK: ld
 ; CHECK: blr
 
@@ -93,11 +93,11 @@ return:
 ; CHECK: li 3, 0
 
 ; CHECK-NOAV: @main
-; CHECK-NOAV-NOT: stvx
+; CHECK-NOAV-NOT: stxvd2x
 ; CHECK-NOAV: bcl
 ; CHECK-NOAV: mflr
 ; CHECK-NOAV: bl foo
-; CHECK-NOAV-NOT: lvx
+; CHECK-NOAV-NOT: lxvd2x
 ; CHECK-NOAV: blr
 }
 

Modified: llvm/trunk/test/CodeGen/PowerPC/vsx-args.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vsx-args.ll?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vsx-args.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vsx-args.ll Tue Oct  4 01:59:23 2016
@@ -1,5 +1,6 @@
 ; RUN: llc -verify-machineinstrs < %s -mcpu=pwr7 -mattr=+vsx | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 | \
+; RUN:   FileCheck -check-prefix=CHECK-FISL %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -13,13 +14,23 @@ entry:
 
 ; CHECK-LABEL: @main
 ; CHECK-DAG: vor [[V:[0-9]+]], 2, 2
-; CHECK-DAG: xxlor 34, 35, 35
-; CHECK-DAG: xxlor 35, 36, 36
+; CHECK-DAG: vor 2, 3, 3
+; CHECK-DAG: vor 3, 4, 4
 ; CHECK-DAG: vor 4, [[V]], [[V]]
-; CHECK-DAG: bl sv
-; CHECK-DAG: lxvd2x [[VC:[0-9]+]],
+; CHECK: bl sv
+; CHECK: lxvd2x [[VC:[0-9]+]],
 ; CHECK: xvadddp 34, 34, [[VC]]
 ; CHECK: blr
+
+; CHECK-FISL-LABEL: @main
+; CHECK-FISL: stxvd2x 34
+; CHECK-FISL: vor 2, 3, 3
+; CHECK-FISL: vor 3, 4, 4
+; CHECK-FISL: lxvd2x 36
+; CHECK-FISL: bl sv
+; CHECK-FISL: lxvd2x [[VC:[0-9]+]],
+; CHECK-FISL: xvadddp 34, 34, [[VC]]
+; CHECK-FISL: blr
 }
 
 attributes #0 = { noinline nounwind readnone }

Modified: llvm/trunk/test/CodeGen/PowerPC/vsx-infl-copy1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vsx-infl-copy1.ll?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vsx-infl-copy1.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vsx-infl-copy1.ll Tue Oct  4 01:59:23 2016
@@ -11,7 +11,15 @@ entry:
   br label %vector.body
 
 ; CHECK-LABEL: @_Z8example9Pj
-; CHECK: xxlor
+; CHECK: vor
+; CHECK: vor
+; CHECK: vor
+; CHECK: vor
+; CHECK: vor
+; CHECK: vor
+; CHECK: vor
+; CHECK: vor
+; CHECK: vor
 
 vector.body:                                      ; preds = %vector.body, %entry
   %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]

Modified: llvm/trunk/test/CodeGen/PowerPC/vsx-p8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vsx-p8.ll?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vsx-p8.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vsx-p8.ll Tue Oct  4 01:59:23 2016
@@ -34,8 +34,7 @@ define <4 x float> @test32u(<4 x float>*
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test32u
-; CHECK-FISL: lxvw4x 0, 0, 3
-; CHECK-FISL: xxlor 34, 0, 0
+; CHECK-FISL: lxvw4x 34, 0, 3
 ; CHECK-FISL: blr
 }
 
@@ -48,8 +47,7 @@ define void @test33u(<4 x float>* %a, <4
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test33u
-; CHECK-FISL: vor 3, 2, 2
-; CHECK-FISL: stxvw4x 35, 0, 3
+; CHECK-FISL: stxvw4x 34, 0, 3
 ; CHECK-FISL: blr
 }
 

Added: llvm/trunk/test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll?rev=283190&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll Tue Oct  4 01:59:23 2016
@@ -0,0 +1,1132 @@
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
+; RUN:   --check-prefix=CHECK-BE
+; Function Attrs: norecurse nounwind readonly
+define <16 x i8> @vecucuc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0
+  %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %splat.splat
+; CHECK-LABEL: vecucuc
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: vspltb 2, 2, 7
+; CHECK-BE-LABEL: vecucuc
+; CHECK-BE: lxsibzx 34, 0, 3
+; CHECK-BE-NEXT: vspltb 2, 2, 7
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <8 x i16> @vecusuc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = zext i8 %0 to i16
+  %splat.splatinsert = insertelement <8 x i16> undef, i16 %conv, i32 0
+  %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
+  ret <8 x i16> %splat.splat
+; CHECK-LABEL: vecusuc
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: vsplth 2, 2, 3
+; CHECK-BE-LABEL: vecusuc
+; CHECK-BE: lxsibzx 34, 0, 3
+; CHECK-BE-NEXT: vsplth 2, 2, 3
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x i32> @vecuiuc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = zext i8 %0 to i32
+  %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
+  %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %splat.splat
+; CHECK-LABEL: vecuiuc
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: xxspltw 34, 34, 1
+; CHECK-BE-LABEL: vecuiuc
+; CHECK-BE: lxsibzx 34, 0, 3
+; CHECK-BE-NEXT: xxspltw 34, 34, 1
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @veculuc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = zext i8 %0 to i64
+  %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %splat.splat
+; CHECK-LABEL: veculuc
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: xxspltd 34, 34, 0
+; CHECK-BE-LABEL: veculuc
+; CHECK-BE: lxsibzx 34, 0, 3
+; CHECK-BE-NEXT: xxspltd 34, 34, 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <16 x i8> @vecscuc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0
+  %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %splat.splat
+; CHECK-LABEL: vecscuc
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: vspltb 2, 2, 7
+; CHECK-BE-LABEL: vecscuc
+; CHECK-BE: lxsibzx 34, 0, 3
+; CHECK-BE-NEXT: vspltb 2, 2, 7
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <8 x i16> @vecssuc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = zext i8 %0 to i16
+  %splat.splatinsert = insertelement <8 x i16> undef, i16 %conv, i32 0
+  %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
+  ret <8 x i16> %splat.splat
+; CHECK-LABEL: vecssuc
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: vsplth 2, 2, 3
+; CHECK-BE-LABEL: vecssuc
+; CHECK-BE: lxsibzx 34, 0, 3
+; CHECK-BE-NEXT: vsplth 2, 2, 3
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x i32> @vecsiuc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = zext i8 %0 to i32
+  %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
+  %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %splat.splat
+; CHECK-LABEL: vecsiuc
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: xxspltw 34, 34, 1
+; CHECK-BE-LABEL: vecsiuc
+; CHECK-BE: lxsibzx 34, 0, 3
+; CHECK-BE-NEXT: xxspltw 34, 34, 1
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @vecsluc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = zext i8 %0 to i64
+  %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %splat.splat
+; CHECK-LABEL: vecsluc
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: xxspltd 34, 34, 0
+; CHECK-BE-LABEL: vecsluc
+; CHECK-BE: lxsibzx 34, 0, 3
+; CHECK-BE-NEXT: xxspltd 34, 34, 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x float> @vecfuc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = uitofp i8 %0 to float
+  %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0
+  %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
+  ret <4 x float> %splat.splat
+; CHECK-LABEL: vecfuc
+; CHECK: lxsibzx [[LD:[0-9]+]], 0, 3
+; CHECK-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]]
+; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
+; CHECK-NEXT: xxspltw 34, [[CONVS]], 0
+; CHECK-BE-LABEL: vecfuc
+; CHECK-BE: lxsibzx [[LD:[0-9]+]], 0, 3
+; CHECK-BE-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]]
+; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
+; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x double> @vecduc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = uitofp i8 %0 to double
+  %splat.splatinsert = insertelement <2 x double> undef, double %conv, i32 0
+  %splat.splat = shufflevector <2 x double> %splat.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer
+  ret <2 x double> %splat.splat
+; CHECK-LABEL: vecduc
+; CHECK: lxsibzx [[LD:[0-9]+]], 0, 3
+; CHECK-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]]
+; CHECK-NEXT: xxspltd  34, [[CONVD]], 0
+; CHECK-BE-LABEL: vecduc
+; CHECK-BE: lxsibzx [[LD:[0-9]+]], 0, 3
+; CHECK-BE-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]]
+; CHECK-BE-NEXT: xxspltd  34, [[CONVD]], 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <16 x i8> @vecucsc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0
+  %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %splat.splat
+; CHECK-LABEL: vecucsc
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: vspltb 2, 2, 7
+; CHECK-BE-LABEL: vecucsc
+; CHECK-BE: lxsibzx 34, 0, 3
+; CHECK-BE-NEXT: vspltb 2, 2, 7
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x i32> @vecuisc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = sext i8 %0 to i32
+  %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
+  %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %splat.splat
+; CHECK-LABEL: vecuisc
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: vextsb2w 2, 2
+; CHECK-NEXT: xxspltw 34, 34, 1
+; CHECK-BE-LABEL: vecuisc
+; CHECK-BE: lxsibzx 34, 0, 3
+; CHECK-BE-NEXT: vextsb2w 2, 2
+; CHECK-BE-NEXT: xxspltw 34, 34, 1
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @veculsc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = sext i8 %0 to i64
+  %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %splat.splat
+; CHECK-LABEL: veculsc
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: vextsb2d 2, 2
+; CHECK-NEXT: xxspltd 34, 34, 0
+; CHECK-BE-LABEL: veculsc
+; CHECK-BE: lxsibzx 34, 0, 3
+; CHECK-BE-NEXT: vextsb2d 2, 2
+; CHECK-BE-NEXT: xxspltd 34, 34, 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <16 x i8> @vecscsc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0
+  %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %splat.splat
+; CHECK-LABEL: vecscsc
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: vspltb 2, 2, 7
+; CHECK-BE-LABEL: vecscsc
+; CHECK-BE: lxsibzx 34, 0, 3
+; CHECK-BE-NEXT: vspltb 2, 2, 7
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x i32> @vecsisc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = sext i8 %0 to i32
+  %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
+  %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %splat.splat
+; CHECK-LABEL: vecsisc
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: vextsb2w 2, 2
+; CHECK-NEXT: xxspltw 34, 34, 1
+; CHECK-BE-LABEL: vecsisc
+; CHECK-BE: lxsibzx 34, 0, 3
+; CHECK-BE-NEXT: vextsb2w 2, 2
+; CHECK-BE-NEXT: xxspltw 34, 34, 1
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @vecslsc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = sext i8 %0 to i64
+  %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %splat.splat
+; CHECK-LABEL: vecslsc
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: vextsb2d 2, 2
+; CHECK-NEXT: xxspltd 34, 34, 0
+; CHECK-BE-LABEL: vecslsc
+; CHECK-BE: lxsibzx 34, 0, 3
+; CHECK-BE-NEXT: vextsb2d 2, 2
+; CHECK-BE-NEXT: xxspltd 34, 34, 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x float> @vecfsc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = sitofp i8 %0 to float
+  %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0
+  %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
+  ret <4 x float> %splat.splat
+; CHECK-LABEL: vecfsc
+; CHECK: lxsibzx
+; CHECK-NEXT: vextsb2d
+; CHECK-NEXT: xscvsxdsp [[CONVD:[0-9]+]],
+; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
+; CHECK-NEXT: xxspltw 34, [[CONVS]], 0
+; CHECK-BE-LABEL: vecfsc
+; CHECK-BE: lxsibzx [[LD:[0-9]+]], 0, 3
+; CHECK-BE-NEXT: vextsb2d
+; CHECK-BE-NEXT: xscvsxdsp [[CONVD:[0-9]+]],
+; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
+; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x double> @vecdsc(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = sitofp i8 %0 to double
+  %splat.splatinsert = insertelement <2 x double> undef, double %conv, i32 0
+  %splat.splat = shufflevector <2 x double> %splat.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer
+  ret <2 x double> %splat.splat
+; CHECK-LABEL: vecdsc
+; CHECK: lxsibzx
+; CHECK-NEXT: vextsb2d
+; CHECK-NEXT: xscvsxddp [[CONVD:[0-9]+]],
+; CHECK-NEXT: xxspltd  34, [[CONVD]], 0
+; CHECK-BE-LABEL: vecdsc
+; CHECK-BE: lxsibzx
+; CHECK-BE-NEXT: vextsb2d
+; CHECK-BE-NEXT: xscvsxddp [[CONVD:[0-9]+]],
+; CHECK-BE-NEXT: xxspltd  34, [[CONVD]], 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <16 x i8> @vecucus(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = trunc i16 %0 to i8
+  %splat.splatinsert = insertelement <16 x i8> undef, i8 %conv, i32 0
+  %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %splat.splat
+; CHECK-LABEL: vecucus
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: vspltb 2, 2, 7
+; CHECK-BE-LABEL: vecucus
+; CHECK-BE: li [[OFFSET:[0-9]+]], 1
+; CHECK-BE-NEXT: lxsibzx 34, 3, [[OFFSET]]
+; CHECK-BE-NEXT: vspltb 2, 2, 7
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <8 x i16> @vecusus(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %splat.splatinsert = insertelement <8 x i16> undef, i16 %0, i32 0
+  %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
+  ret <8 x i16> %splat.splat
+; CHECK-LABEL: vecusus
+; CHECK: lxsihzx 34, 0, 3
+; CHECK-NEXT: vsplth 2, 2, 3
+; CHECK-BE-LABEL: vecusus
+; CHECK-BE: lxsihzx 34, 0, 3
+; CHECK-BE-NEXT: vsplth 2, 2, 3
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x i32> @vecuius(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = zext i16 %0 to i32
+  %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
+  %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %splat.splat
+; CHECK-LABEL: vecuius
+; CHECK: lxsihzx 34, 0, 3
+; CHECK-NEXT: xxspltw 34, 34, 1
+; CHECK-BE-LABEL: vecuius
+; CHECK-BE: lxsihzx 34, 0, 3
+; CHECK-BE-NEXT: xxspltw 34, 34, 1
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @veculus(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = zext i16 %0 to i64
+  %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %splat.splat
+; CHECK-LABEL: veculus
+; CHECK: lxsihzx 34, 0, 3
+; CHECK-NEXT: xxspltd 34, 34, 0
+; CHECK-BE-LABEL: veculus
+; CHECK-BE: lxsihzx 34, 0, 3
+; CHECK-BE-NEXT: xxspltd 34, 34, 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <16 x i8> @vecscus(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = trunc i16 %0 to i8
+  %splat.splatinsert = insertelement <16 x i8> undef, i8 %conv, i32 0
+  %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %splat.splat
+; CHECK-LABEL: vecscus
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: vspltb 2, 2, 7
+; CHECK-BE-LABEL: vecscus
+; CHECK-BE: li [[OFFSET:[0-9]+]], 1
+; CHECK-BE-NEXT: lxsibzx 34, 3, [[OFFSET]]
+; CHECK-BE-NEXT: vspltb 2, 2, 7
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <8 x i16> @vecssus(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %splat.splatinsert = insertelement <8 x i16> undef, i16 %0, i32 0
+  %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
+  ret <8 x i16> %splat.splat
+; CHECK-LABEL: vecssus
+; CHECK: lxsihzx 34, 0, 3
+; CHECK-NEXT: vsplth 2, 2, 3
+; CHECK-BE-LABEL: vecssus
+; CHECK-BE: lxsihzx 34, 0, 3
+; CHECK-BE-NEXT: vsplth 2, 2, 3
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x i32> @vecsius(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = zext i16 %0 to i32
+  %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
+  %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %splat.splat
+; CHECK-LABEL: vecsius
+; CHECK: lxsihzx 34, 0, 3
+; CHECK-NEXT: xxspltw 34, 34, 1
+; CHECK-BE-LABEL: vecsius
+; CHECK-BE: lxsihzx 34, 0, 3
+; CHECK-BE-NEXT: xxspltw 34, 34, 1
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @vecslus(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = zext i16 %0 to i64
+  %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %splat.splat
+; CHECK-LABEL: vecslus
+; CHECK: lxsihzx 34, 0, 3
+; CHECK-NEXT: xxspltd 34, 34, 0
+; CHECK-BE-LABEL: vecslus
+; CHECK-BE: lxsihzx 34, 0, 3
+; CHECK-BE-NEXT: xxspltd 34, 34, 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x float> @vecfus(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = uitofp i16 %0 to float
+  %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0
+  %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
+  ret <4 x float> %splat.splat
+; CHECK-LABEL: vecfus
+; CHECK: lxsihzx [[LD:[0-9]+]], 0, 3
+; CHECK-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]]
+; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
+; CHECK-NEXT: xxspltw 34, [[CONVS]], 0
+; CHECK-BE-LABEL: vecfus
+; CHECK-BE: lxsihzx [[LD:[0-9]+]], 0, 3
+; CHECK-BE-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]]
+; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
+; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x double> @vecdus(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = uitofp i16 %0 to double
+  %splat.splatinsert = insertelement <2 x double> undef, double %conv, i32 0
+  %splat.splat = shufflevector <2 x double> %splat.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer
+  ret <2 x double> %splat.splat
+; CHECK-LABEL: vecdus
+; CHECK: lxsihzx [[LD:[0-9]+]], 0, 3
+; CHECK-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]]
+; CHECK-NEXT: xxspltd  34, [[CONVD]], 0
+; CHECK-BE-LABEL: vecdus
+; CHECK-BE: lxsihzx [[LD:[0-9]+]], 0, 3
+; CHECK-BE-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]]
+; CHECK-BE-NEXT: xxspltd  34, [[CONVD]], 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <16 x i8> @vecucss(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = trunc i16 %0 to i8
+  %splat.splatinsert = insertelement <16 x i8> undef, i8 %conv, i32 0
+  %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %splat.splat
+; CHECK-LABEL: vecucss
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: vspltb 2, 2, 7
+; CHECK-BE-LABEL: vecucss
+; CHECK-BE: li [[OFFSET:[0-9]+]], 1
+; CHECK-BE-NEXT: lxsibzx 34, 3, [[OFFSET]]
+; CHECK-BE-NEXT: vspltb 2, 2, 7
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x i32> @vecuiss(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = sext i16 %0 to i32
+  %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
+  %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %splat.splat
+; CHECK-LABEL: vecuiss
+; CHECK: lxsihzx 34, 0, 3
+; CHECK-NEXT: vextsh2w 2, 2
+; CHECK-NEXT: xxspltw 34, 34, 1
+; CHECK-BE-LABEL: vecuiss
+; CHECK-BE: lxsihzx 34, 0, 3
+; CHECK-BE-NEXT: vextsh2w 2, 2
+; CHECK-BE-NEXT: xxspltw 34, 34, 1
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @veculss(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = sext i16 %0 to i64
+  %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %splat.splat
+; CHECK-LABEL: veculss
+; CHECK: lxsihzx 34, 0, 3
+; CHECK-NEXT: vextsh2d 2, 2
+; CHECK-NEXT: xxspltd 34, 34, 0
+; CHECK-BE-LABEL: veculss
+; CHECK-BE: lxsihzx 34, 0, 3
+; CHECK-BE-NEXT: vextsh2d 2, 2
+; CHECK-BE-NEXT: xxspltd 34, 34, 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <16 x i8> @vecscss(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = trunc i16 %0 to i8
+  %splat.splatinsert = insertelement <16 x i8> undef, i8 %conv, i32 0
+  %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %splat.splat
+; CHECK-LABEL: vecscss
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: vspltb 2, 2, 7
+; CHECK-BE-LABEL: vecscss
+; CHECK-BE: li [[OFFSET:[0-9]+]], 1
+; CHECK-BE-NEXT: lxsibzx 34, 3, [[OFFSET]]
+; CHECK-BE-NEXT: vspltb 2, 2, 7
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x i32> @vecsiss(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = sext i16 %0 to i32
+  %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
+  %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %splat.splat
+; CHECK-LABEL: vecsiss
+; CHECK: lxsihzx 34, 0, 3
+; CHECK-NEXT: vextsh2w 2, 2
+; CHECK-NEXT: xxspltw 34, 34, 1
+; CHECK-BE-LABEL: vecsiss
+; CHECK-BE: lxsihzx 34, 0, 3
+; CHECK-BE-NEXT: vextsh2w 2, 2
+; CHECK-BE-NEXT: xxspltw 34, 34, 1
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @vecslss(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = sext i16 %0 to i64
+  %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %splat.splat
+; CHECK-LABEL: vecslss
+; CHECK: lxsihzx 34, 0, 3
+; CHECK-NEXT: vextsh2d 2, 2
+; CHECK-NEXT: xxspltd 34, 34, 0
+; CHECK-BE-LABEL: vecslss
+; CHECK-BE: lxsihzx 34, 0, 3
+; CHECK-BE-NEXT: vextsh2d 2, 2
+; CHECK-BE-NEXT: xxspltd 34, 34, 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x float> @vecfss(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = sitofp i16 %0 to float
+  %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0
+  %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
+  ret <4 x float> %splat.splat
+; CHECK-LABEL: vecfss
+; CHECK: lxsihzx
+; CHECK-NEXT: vextsh2d
+; CHECK-NEXT: xscvsxdsp [[CONVD:[0-9]+]],
+; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
+; CHECK-NEXT: xxspltw 34, [[CONVS]], 0
+; CHECK-BE-LABEL: vecfss
+; CHECK-BE: lxsihzx [[LD:[0-9]+]], 0, 3
+; CHECK-BE-NEXT: vextsh2d
+; CHECK-BE-NEXT: xscvsxdsp [[CONVD:[0-9]+]],
+; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
+; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x double> @vecdss(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = sitofp i16 %0 to double
+  %splat.splatinsert = insertelement <2 x double> undef, double %conv, i32 0
+  %splat.splat = shufflevector <2 x double> %splat.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer
+  ret <2 x double> %splat.splat
+; CHECK-LABEL: vecdss
+; CHECK: lxsihzx
+; CHECK-NEXT: vextsh2d
+; CHECK-NEXT: xscvsxddp [[CONVD:[0-9]+]],
+; CHECK-NEXT: xxspltd  34, [[CONVD]], 0
+; CHECK-BE-LABEL: vecdss
+; CHECK-BE: lxsihzx
+; CHECK-BE-NEXT: vextsh2d
+; CHECK-BE-NEXT: xscvsxddp [[CONVD:[0-9]+]],
+; CHECK-BE-NEXT: xxspltd  34, [[CONVD]], 0
+}
+
+; Function Attrs: norecurse nounwind
+define void @storefsc(float %f, i8* nocapture %ptr) {
+entry:
+  %conv = fptosi float %f to i8
+  store i8 %conv, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storefsc
+; CHECK: xscvdpsxws 0, 1
+; CHECK: stxsibx 0, 0, 4
+; CHECK-BE-LABEL: storefsc
+; CHECK-BE: xscvdpsxws 0, 1
+; CHECK-BE: stxsibx 0, 0, 4
+}
+
+; Function Attrs: norecurse nounwind
+define void @storedsc(double %d, i8* nocapture %ptr) {
+entry:
+  %conv = fptosi double %d to i8
+  store i8 %conv, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storedsc
+; CHECK: xscvdpsxws 0, 1
+; CHECK: stxsibx 0, 0, 4
+; CHECK-BE-LABEL: storedsc
+; CHECK-BE: xscvdpsxws 0, 1
+; CHECK-BE: stxsibx 0, 0, 4
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevcsc0(<16 x i8> %v, i8* nocapture %ptr) {
+entry:
+  %vecext = extractelement <16 x i8> %v, i32 0
+  store i8 %vecext, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storevcsc0
+; CHECK: vsldoi 2, 2, 2, 8
+; CHECK-NEXT: stxsibx 34, 0, 5
+; CHECK-BE-LABEL: storevcsc0
+; CHECK-BE: vsldoi 2, 2, 2, 9
+; CHECK-BE-NEXT: stxsibx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevcsc1(<16 x i8> %v, i8* nocapture %ptr) {
+entry:
+  %vecext = extractelement <16 x i8> %v, i32 1
+  store i8 %vecext, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storevcsc1
+; CHECK: vsldoi 2, 2, 2, 7
+; CHECK-NEXT: stxsibx 34, 0, 5
+; CHECK-BE-LABEL: storevcsc1
+; CHECK-BE: vsldoi 2, 2, 2, 10
+; CHECK-BE-NEXT: stxsibx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevcsc2(<16 x i8> %v, i8* nocapture %ptr) {
+entry:
+  %vecext = extractelement <16 x i8> %v, i32 2
+  store i8 %vecext, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storevcsc2
+; CHECK: vsldoi 2, 2, 2, 6
+; CHECK-NEXT: stxsibx 34, 0, 5
+; CHECK-BE-LABEL: storevcsc2
+; CHECK-BE: vsldoi 2, 2, 2, 11
+; CHECK-BE-NEXT: stxsibx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevcsc3(<16 x i8> %v, i8* nocapture %ptr) {
+entry:
+  %vecext = extractelement <16 x i8> %v, i32 3
+  store i8 %vecext, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storevcsc3
+; CHECK: vsldoi 2, 2, 2, 5
+; CHECK-NEXT: stxsibx 34, 0, 5
+; CHECK-BE-LABEL: storevcsc3
+; CHECK-BE: vsldoi 2, 2, 2, 12
+; CHECK-BE-NEXT: stxsibx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevcsc4(<16 x i8> %v, i8* nocapture %ptr) {
+entry:
+  %vecext = extractelement <16 x i8> %v, i32 4
+  store i8 %vecext, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storevcsc4
+; CHECK: vsldoi 2, 2, 2, 4
+; CHECK-NEXT: stxsibx 34, 0, 5
+; CHECK-BE-LABEL: storevcsc4
+; CHECK-BE: vsldoi 2, 2, 2, 13
+; CHECK-BE-NEXT: stxsibx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevcsc5(<16 x i8> %v, i8* nocapture %ptr) {
+entry:
+  %vecext = extractelement <16 x i8> %v, i32 5
+  store i8 %vecext, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storevcsc5
+; CHECK: vsldoi 2, 2, 2, 3
+; CHECK-NEXT: stxsibx 34, 0, 5
+; CHECK-BE-LABEL: storevcsc5
+; CHECK-BE: vsldoi 2, 2, 2, 14
+; CHECK-BE-NEXT: stxsibx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevcsc6(<16 x i8> %v, i8* nocapture %ptr) {
+entry:
+  %vecext = extractelement <16 x i8> %v, i32 6
+  store i8 %vecext, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storevcsc6
+; CHECK: vsldoi 2, 2, 2, 2
+; CHECK-NEXT: stxsibx 34, 0, 5
+; CHECK-BE-LABEL: storevcsc6
+; CHECK-BE: vsldoi 2, 2, 2, 15
+; CHECK-BE-NEXT: stxsibx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevcsc7(<16 x i8> %v, i8* nocapture %ptr) {
+entry:
+  %vecext = extractelement <16 x i8> %v, i32 7
+  store i8 %vecext, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storevcsc7
+; CHECK: vsldoi 2, 2, 2, 1
+; CHECK-NEXT: stxsibx 34, 0, 5
+; CHECK-BE-LABEL: storevcsc7
+; CHECK-BE: stxsibx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevcsc8(<16 x i8> %v, i8* nocapture %ptr) {
+entry:
+  %vecext = extractelement <16 x i8> %v, i32 8
+  store i8 %vecext, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storevcsc8
+; CHECK: stxsibx 34, 0, 5
+; CHECK-BE-LABEL: storevcsc8
+; CHECK-BE: vsldoi 2, 2, 2, 1
+; CHECK-BE-NEXT: stxsibx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevcsc9(<16 x i8> %v, i8* nocapture %ptr) {
+entry:
+  %vecext = extractelement <16 x i8> %v, i32 9
+  store i8 %vecext, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storevcsc9
+; CHECK: vsldoi 2, 2, 2, 15
+; CHECK-NEXT: stxsibx 34, 0, 5
+; CHECK-BE-LABEL: storevcsc9
+; CHECK-BE: vsldoi 2, 2, 2, 2
+; CHECK-BE-NEXT: stxsibx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevcsc10(<16 x i8> %v, i8* nocapture %ptr) {
+entry:
+  %vecext = extractelement <16 x i8> %v, i32 10
+  store i8 %vecext, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storevcsc10
+; CHECK: vsldoi 2, 2, 2, 14
+; CHECK-NEXT: stxsibx 34, 0, 5
+; CHECK-BE-LABEL: storevcsc10
+; CHECK-BE: vsldoi 2, 2, 2, 3
+; CHECK-BE-NEXT: stxsibx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevcsc11(<16 x i8> %v, i8* nocapture %ptr) {
+entry:
+  %vecext = extractelement <16 x i8> %v, i32 11
+  store i8 %vecext, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storevcsc11
+; CHECK: vsldoi 2, 2, 2, 13
+; CHECK-NEXT: stxsibx 34, 0, 5
+; CHECK-BE-LABEL: storevcsc11
+; CHECK-BE: vsldoi 2, 2, 2, 4
+; CHECK-BE-NEXT: stxsibx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevcsc12(<16 x i8> %v, i8* nocapture %ptr) {
+entry:
+  %vecext = extractelement <16 x i8> %v, i32 12
+  store i8 %vecext, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storevcsc12
+; CHECK: vsldoi 2, 2, 2, 12
+; CHECK-NEXT: stxsibx 34, 0, 5
+; CHECK-BE-LABEL: storevcsc12
+; CHECK-BE: vsldoi 2, 2, 2, 5
+; CHECK-BE-NEXT: stxsibx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevcsc13(<16 x i8> %v, i8* nocapture %ptr) {
+entry:
+  %vecext = extractelement <16 x i8> %v, i32 13
+  store i8 %vecext, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storevcsc13
+; CHECK: vsldoi 2, 2, 2, 11
+; CHECK-NEXT: stxsibx 34, 0, 5
+; CHECK-BE-LABEL: storevcsc13
+; CHECK-BE: vsldoi 2, 2, 2, 6
+; CHECK-BE-NEXT: stxsibx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevcsc14(<16 x i8> %v, i8* nocapture %ptr) {
+entry:
+  %vecext = extractelement <16 x i8> %v, i32 14
+  store i8 %vecext, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storevcsc14
+; CHECK: vsldoi 2, 2, 2, 10
+; CHECK-NEXT: stxsibx 34, 0, 5
+; CHECK-BE-LABEL: storevcsc14
+; CHECK-BE: vsldoi 2, 2, 2, 7
+; CHECK-BE-NEXT: stxsibx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevcsc15(<16 x i8> %v, i8* nocapture %ptr) {
+entry:
+  %vecext = extractelement <16 x i8> %v, i32 15
+  store i8 %vecext, i8* %ptr, align 1
+  ret void
+; CHECK-LABEL: storevcsc15
+; CHECK: vsldoi 2, 2, 2, 9
+; CHECK-NEXT: stxsibx 34, 0, 5
+; CHECK-BE-LABEL: storevcsc15
+; CHECK-BE: vsldoi 2, 2, 2, 8
+; CHECK-BE-NEXT: stxsibx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storefss(float %f, i16* nocapture %ptr) {
+entry:
+  %conv = fptosi float %f to i16
+  store i16 %conv, i16* %ptr, align 2
+  ret void
+; CHECK-LABEL: storefss
+; CHECK: xscvdpsxws 0, 1
+; CHECK: stxsihx 0, 0, 4
+; CHECK-BE-LABEL: storefss
+; CHECK-BE: xscvdpsxws 0, 1
+; CHECK-BE: stxsihx 0, 0, 4
+}
+
+; Function Attrs: norecurse nounwind
+define void @storedss(double %d, i16* nocapture %ptr) {
+entry:
+  %conv = fptosi double %d to i16
+  store i16 %conv, i16* %ptr, align 2
+  ret void
+; CHECK-LABEL: storedss
+; CHECK: xscvdpsxws 0, 1
+; CHECK: stxsihx 0, 0, 4
+; CHECK-BE-LABEL: storedss
+; CHECK-BE: xscvdpsxws 0, 1
+; CHECK-BE: stxsihx 0, 0, 4
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevsss0(<8 x i16> %v, i16* nocapture %ptr) {
+entry:
+  %vecext = extractelement <8 x i16> %v, i32 0
+  store i16 %vecext, i16* %ptr, align 2
+  ret void
+; CHECK-LABEL: storevsss0
+; CHECK: vsldoi 2, 2, 2, 8
+; CHECK-NEXT: stxsihx 34, 0, 5
+; CHECK-BE-LABEL: storevsss0
+; CHECK-BE: vsldoi 2, 2, 2, 10
+; CHECK-BE-NEXT: stxsihx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevsss1(<8 x i16> %v, i16* nocapture %ptr) {
+entry:
+  %vecext = extractelement <8 x i16> %v, i32 1
+  store i16 %vecext, i16* %ptr, align 2
+  ret void
+; CHECK-LABEL: storevsss1
+; CHECK: vsldoi 2, 2, 2, 6
+; CHECK-NEXT: stxsihx 34, 0, 5
+; CHECK-BE-LABEL: storevsss1
+; CHECK-BE: vsldoi 2, 2, 2, 12
+; CHECK-BE-NEXT: stxsihx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevsss2(<8 x i16> %v, i16* nocapture %ptr) {
+entry:
+  %vecext = extractelement <8 x i16> %v, i32 2
+  store i16 %vecext, i16* %ptr, align 2
+  ret void
+; CHECK-LABEL: storevsss2
+; CHECK: vsldoi 2, 2, 2, 4
+; CHECK-NEXT: stxsihx 34, 0, 5
+; CHECK-BE-LABEL: storevsss2
+; CHECK-BE: vsldoi 2, 2, 2, 14
+; CHECK-BE-NEXT: stxsihx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevsss3(<8 x i16> %v, i16* nocapture %ptr) {
+entry:
+  %vecext = extractelement <8 x i16> %v, i32 3
+  store i16 %vecext, i16* %ptr, align 2
+  ret void
+; CHECK-LABEL: storevsss3
+; CHECK: vsldoi 2, 2, 2, 2
+; CHECK-NEXT: stxsihx 34, 0, 5
+; CHECK-BE-LABEL: storevsss3
+; CHECK-BE: stxsihx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevsss4(<8 x i16> %v, i16* nocapture %ptr) {
+entry:
+  %vecext = extractelement <8 x i16> %v, i32 4
+  store i16 %vecext, i16* %ptr, align 2
+  ret void
+; CHECK-LABEL: storevsss4
+; CHECK: stxsihx 34, 0, 5
+; CHECK-BE-LABEL: storevsss4
+; CHECK-BE: vsldoi 2, 2, 2, 2
+; CHECK-BE-NEXT: stxsihx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevsss5(<8 x i16> %v, i16* nocapture %ptr) {
+entry:
+  %vecext = extractelement <8 x i16> %v, i32 5
+  store i16 %vecext, i16* %ptr, align 2
+  ret void
+; CHECK-LABEL: storevsss5
+; CHECK: vsldoi 2, 2, 2, 14
+; CHECK-NEXT: stxsihx 34, 0, 5
+; CHECK-BE-LABEL: storevsss5
+; CHECK-BE: vsldoi 2, 2, 2, 4
+; CHECK-BE-NEXT: stxsihx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevsss6(<8 x i16> %v, i16* nocapture %ptr) {
+entry:
+  %vecext = extractelement <8 x i16> %v, i32 6
+  store i16 %vecext, i16* %ptr, align 2
+  ret void
+; CHECK-LABEL: storevsss6
+; CHECK: vsldoi 2, 2, 2, 12
+; CHECK-NEXT: stxsihx 34, 0, 5
+; CHECK-BE-LABEL: storevsss6
+; CHECK-BE: vsldoi 2, 2, 2, 6
+; CHECK-BE-NEXT: stxsihx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind
+define void @storevsss7(<8 x i16> %v, i16* nocapture %ptr) {
+entry:
+  %vecext = extractelement <8 x i16> %v, i32 7
+  store i16 %vecext, i16* %ptr, align 2
+  ret void
+; CHECK-LABEL: storevsss7
+; CHECK: vsldoi 2, 2, 2, 10
+; CHECK-NEXT: stxsihx 34, 0, 5
+; CHECK-BE-LABEL: storevsss7
+; CHECK-BE: vsldoi 2, 2, 2, 8
+; CHECK-BE-NEXT: stxsihx 34, 0, 5
+}
+
+; Function Attrs: norecurse nounwind readonly
+define float @convscf(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = sitofp i8 %0 to float
+  ret float %conv
+; CHECK-LABEL: convscf
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: vextsb2d 2, 2
+; CHECK-NEXT: xscvsxdsp 1, 34
+; CHECK-BE-LABEL: convscf
+; CHECK-BE: lxsibzx 34, 0, 3
+; CHECK-BE-NEXT: vextsb2d 2, 2
+; CHECK-BE-NEXT: xscvsxdsp 1, 34
+}
+
+; Function Attrs: norecurse nounwind readonly
+define float @convucf(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = uitofp i8 %0 to float
+  ret float %conv
+; CHECK-LABEL: convucf
+; CHECK: lxsibzx 0, 0, 3
+; CHECK-NEXT: xscvuxdsp 1, 0
+; CHECK-BE-LABEL: convucf
+; CHECK-BE: lxsibzx 0, 0, 3
+; CHECK-BE-NEXT: xscvuxdsp 1, 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define double @convscd(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = sitofp i8 %0 to double
+; CHECK-LABEL: convscd
+; CHECK: lxsibzx 34, 0, 3
+; CHECK-NEXT: vextsb2d 2, 2
+; CHECK-NEXT: xscvsxddp 1, 34
+; CHECK-BE-LABEL: convscd
+; CHECK-BE: lxsibzx 34, 0, 3
+; CHECK-BE-NEXT: vextsb2d 2, 2
+; CHECK-BE-NEXT: xscvsxddp 1, 34
+  ret double %conv
+}
+
+; Function Attrs: norecurse nounwind readonly
+define double @convucd(i8* nocapture readonly %ptr) {
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %conv = uitofp i8 %0 to double
+  ret double %conv
+; CHECK-LABEL: convucd
+; CHECK: lxsibzx 0, 0, 3
+; CHECK-NEXT: xscvuxddp 1, 0
+; CHECK-BE-LABEL: convucd
+; CHECK-BE: lxsibzx 0, 0, 3
+; CHECK-BE-NEXT: xscvuxddp 1, 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define float @convssf(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = sitofp i16 %0 to float
+  ret float %conv
+; CHECK-LABEL: convssf
+; CHECK: lxsihzx 34, 0, 3
+; CHECK-NEXT: vextsh2d 2, 2
+; CHECK-NEXT: xscvsxdsp 1, 34
+; CHECK-BE-LABEL: convssf
+; CHECK-BE: lxsihzx 34, 0, 3
+; CHECK-BE-NEXT: vextsh2d 2, 2
+; CHECK-BE-NEXT: xscvsxdsp 1, 34
+}
+
+; Function Attrs: norecurse nounwind readonly
+define float @convusf(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = uitofp i16 %0 to float
+  ret float %conv
+; CHECK-LABEL: convusf
+; CHECK: lxsihzx 0, 0, 3
+; CHECK-NEXT: xscvuxdsp 1, 0
+; CHECK-BE-LABEL: convusf
+; CHECK-BE: lxsihzx 0, 0, 3
+; CHECK-BE-NEXT: xscvuxdsp 1, 0
+}
+
+; Function Attrs: norecurse nounwind readonly
+define double @convssd(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = sitofp i16 %0 to double
+  ret double %conv
+; CHECK-LABEL: convssd
+; CHECK: lxsihzx 34, 0, 3
+; CHECK-NEXT: vextsh2d 2, 2
+; CHECK-NEXT: xscvsxddp 1, 34
+; CHECK-BE-LABEL: convssd
+; CHECK-BE: lxsihzx 34, 0, 3
+; CHECK-BE-NEXT: vextsh2d 2, 2
+; CHECK-BE-NEXT: xscvsxddp 1, 34
+}
+
+; Function Attrs: norecurse nounwind readonly
+define double @convusd(i16* nocapture readonly %ptr) {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %conv = uitofp i16 %0 to double
+  ret double %conv
+; CHECK-LABEL: convusd
+; CHECK: lxsihzx 0, 0, 3
+; CHECK-NEXT: xscvuxddp 1, 0
+; CHECK-BE-LABEL: convusd
+; CHECK-BE: lxsihzx 0, 0, 3
+; CHECK-BE-NEXT: xscvuxddp 1, 0
+}

Modified: llvm/trunk/test/CodeGen/PowerPC/vsx-spill-norwstore.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vsx-spill-norwstore.ll?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vsx-spill-norwstore.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vsx-spill-norwstore.ll Tue Oct  4 01:59:23 2016
@@ -4,6 +4,7 @@ target triple = "powerpc64-unknown-linux
 
 @.str1 = external unnamed_addr constant [5 x i8], align 1
 @.str10 = external unnamed_addr constant [9 x i8], align 1
+ at .v2f64 = external unnamed_addr constant <2 x double>, align 16
 
 ; Function Attrs: nounwind
 define void @main() #0 {
@@ -12,6 +13,7 @@ define void @main() #0 {
 ; CHECK: stxvd2x
 
 entry:
+  %val = load <2 x double>, <2 x double>* @.v2f64, align 16
   %0 = tail call <8 x i16> @llvm.ppc.altivec.vupkhsb(<16 x i8> <i8 0, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1>) #0
   %1 = tail call <8 x i16> @llvm.ppc.altivec.vupklsb(<16 x i8> <i8 0, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1>) #0
   br i1 false, label %if.then.i68.i, label %check.exit69.i
@@ -23,7 +25,7 @@ check.exit69.i:
   br i1 undef, label %if.then.i63.i, label %check.exit64.i
 
 if.then.i63.i:                                    ; preds = %check.exit69.i
-  tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str10, i64 0, i64 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str1, i64 0, i64 0)) #0
+  tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str10, i64 0, i64 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str1, i64 0, i64 0), <2 x double> %val) #0
   br label %check.exit64.i
 
 check.exit64.i:                                   ; preds = %if.then.i63.i, %check.exit69.i

Added: llvm/trunk/test/CodeGen/PowerPC/vsx-vec-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vsx-vec-spill.ll?rev=283190&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vsx-vec-spill.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vsx-vec-spill.ll Tue Oct  4 01:59:23 2016
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=ppc64 -mattr=+vsx -verify-machineinstrs | \
+; RUN:   FileCheck %s --check-prefix=VSX
+; RUN: llc < %s -march=ppc64 -mattr=-vsx -verify-machineinstrs | \
+; RUN:   FileCheck %s --check-prefix=NOVSX
+
+define <2 x double> @interleaving_VSX_VMX(
+  <2 x double> %a, <2 x double> %b, <2 x double> %c,
+  <2 x double> %d, <2 x double> %e, <2 x double> %f) {
+entry:
+  tail call void asm sideeffect "# clobbers",
+    "~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() nounwind
+  tail call void @goo(<2 x double> %a) nounwind
+  %add = fadd <2 x double> %a, %b
+  %sub = fsub <2 x double> %a, %b
+  %mul = fmul <2 x double> %add, %sub
+  %add1 = fadd <2 x double> %c, %d
+  %sub2 = fsub <2 x double> %c, %d
+  %mul3 = fmul <2 x double> %add1, %sub2
+  %add4 = fadd <2 x double> %mul, %mul3
+  %add5 = fadd <2 x double> %e, %f
+  %sub6 = fsub <2 x double> %e, %f
+  %mul7 = fmul <2 x double> %add5, %sub6
+  %add8 = fadd <2 x double> %add4, %mul7
+  ret <2 x double> %add8
+; VSX-LABEL: interleaving_VSX_VMX
+; VSX-NOT: stvx
+; VSX-NOT: lvx
+
+; NOVSX-LABEL: interleaving_VSX_VMX
+; NOVSX-NOT: stxvd2x
+; NOVSX-NOT: lxvd2x
+}
+
+declare void @goo(<2 x double>)

Modified: llvm/trunk/test/CodeGen/PowerPC/vsx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vsx.ll?rev=283190&r1=283189&r2=283190&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vsx.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vsx.ll Tue Oct  4 01:59:23 2016
@@ -70,10 +70,7 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test5
-; CHECK-FISL: vor
-; CHECK-FISL: vor
-; CHECK-FISL: xxlxor
-; CHECK-FISL: vor 2
+; CHECK-FISL: xxlxor 34, 34, 35
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test5
@@ -91,10 +88,7 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test6
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxlxor 36, 36, 37
-; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: xxlxor 34, 34, 35
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test6
@@ -112,10 +106,7 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test7
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxlxor 36, 36, 37
-; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: xxlxor 34, 34, 35
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test7
@@ -133,10 +124,7 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test8
-; CHECK-FISL: vor
-; CHECK-FISL: vor
-; CHECK-FISL: xxlor
-; CHECK-FISL: vor 2
+; CHECK-FISL: xxlor 34, 34, 35
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test8
@@ -154,10 +142,7 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test9
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxlor 36, 36, 37
-; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: xxlor 34, 34, 35
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test9
@@ -175,10 +160,7 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test10
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxlor 36, 36, 37
-; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: xxlor 34, 34, 35
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test10
@@ -196,10 +178,7 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test11
-; CHECK-FISL: vor
-; CHECK-FISL: vor
-; CHECK-FISL: xxland
-; CHECK-FISL: vor 2
+; CHECK-FISL: xxland 34, 34, 35
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test11
@@ -217,10 +196,7 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test12
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxland 36, 36, 37
-; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: xxland 34, 34, 35
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test12
@@ -238,10 +214,7 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test13
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxland 36, 36, 37
-; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: xxland 34, 34, 35
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test13
@@ -260,11 +233,8 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test14
-; CHECK-FISL: vor 4, 3, 3
-; CHECK-FISL: vor 5, 2, 2
-; CHECK-FISL: xxlor 0, 37, 36
-; CHECK-FISL: xxlnor 36, 37, 36
-; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: xxlor 0, 34, 35
+; CHECK-FISL: xxlnor 34, 34, 35
 ; CHECK-FISL: lis 0, -1
 ; CHECK-FISL: ori 0, 0, 65520
 ; CHECK-FISL: stxvd2x 0, 1, 0
@@ -286,17 +256,13 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test15
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxlor 36, 36, 37
-; CHECK-FISL: vor 0, 4, 4
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxlnor 36, 36, 37
-; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: xxlor 0, 34, 35
+; CHECK-FISL: xxlor 36, 0, 0
+; CHECK-FISL: xxlnor 0, 34, 35
+; CHECK-FISL: xxlor 34, 0, 0
 ; CHECK-FISL: lis 0, -1
 ; CHECK-FISL: ori 0, 0, 65520
-; CHECK-FISL: stvx 0, 1, 0
+; CHECK-FISL: stxvd2x 36, 1, 0
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test15
@@ -315,17 +281,13 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test16
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxlor 36, 36, 37
-; CHECK-FISL: vor 0, 4, 4
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxlnor 36, 36, 37
-; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: xxlor 0, 34, 35
+; CHECK-FISL: xxlor 36, 0, 0
+; CHECK-FISL: xxlnor 0, 34, 35
+; CHECK-FISL: xxlor 34, 0, 0
 ; CHECK-FISL: lis 0, -1
 ; CHECK-FISL: ori 0, 0, 65520
-; CHECK-FISL: stvx 0, 1, 0
+; CHECK-FISL: stxvd2x 36, 1, 0
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test16
@@ -344,11 +306,8 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test17
-; CHECK-FISL: vor 4, 3, 3
-; CHECK-FISL: vor 5, 2, 2
-; CHECK-FISL: xxlnor 36, 36, 36
-; CHECK-FISL: xxland 36, 37, 36
-; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: xxlnor 35, 35, 35
+; CHECK-FISL: xxland 34, 34, 35
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test17
@@ -367,17 +326,13 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test18
-; CHECK-FISL: vor 4, 3, 3
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxlnor 36, 36, 37
-; CHECK-FISL: vor 0, 4, 4
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxlandc 36, 36, 37
-; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: xxlnor 0, 35, 35
+; CHECK-FISL: xxlor 36, 0, 0
+; CHECK-FISL: xxlandc 0, 34, 35
+; CHECK-FISL: xxlor 34, 0, 0
 ; CHECK-FISL: lis 0, -1
 ; CHECK-FISL: ori 0, 0, 65520
-; CHECK-FISL: stvx 0, 1, 0
+; CHECK-FISL: stxvd2x 36, 1, 0
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test18
@@ -396,17 +351,13 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test19
-; CHECK-FISL: vor 4, 3, 3
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxlnor 36, 36, 37
-; CHECK-FISL: vor 0, 4, 4
-; CHECK-FISL: vor 4, 2, 2
-; CHECK-FISL: vor 5, 3, 3
-; CHECK-FISL: xxlandc 36, 36, 37
-; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: xxlnor 0, 35, 35
+; CHECK-FISL: xxlor 36, 0, 0
+; CHECK-FISL: xxlandc 0, 34, 35
+; CHECK-FISL: xxlor 34, 0, 0
 ; CHECK-FISL: lis 0, -1
 ; CHECK-FISL: ori 0, 0, 65520
-; CHECK-FISL: stvx 0, 1, 0
+; CHECK-FISL: stxvd2x 36, 1, 0
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test19
@@ -425,19 +376,9 @@ entry:
 ; CHECK-REG: xxsel 34, 35, 34, {{[0-9]+}}
 ; CHECK-REG: blr
 
-; FIXME: The fast-isel code is pretty miserable for this one.
-
 ; CHECK-FISL-LABEL: @test20
-; CHECK-FISL: vor 0, 5, 5
-; CHECK-FISL: vor 1, 4, 4
-; CHECK-FISL: vor 6, 3, 3
-; CHECK-FISL: vor 7, 2, 2
-; CHECK-FISL: vor 2, 1, 1
-; CHECK-FISL: vor 3, 0, 0
-; CHECK-FISL: vcmpequw 2, 2, 3
-; CHECK-FISL: vor 0, 2, 2
-; CHECK-FISL: xxsel 32, 38, 39, 32
-; CHECK-FISL: vor 2, 0, 0
+; CHECK-FISL: vcmpequw {{[0-9]+}}, 4, 5
+; CHECK-FISL: xxsel 34, 35, 34, {{[0-9]+}}
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test20
@@ -458,13 +399,8 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test21
-; CHECK-FISL: vor 0, 5, 5
-; CHECK-FISL: vor 1, 4, 4
-; CHECK-FISL: vor 6, 3, 3
-; CHECK-FISL: vor 7, 2, 2
-; CHECK-FISL: xvcmpeqsp 32, 33, 32
-; CHECK-FISL: xxsel 32, 38, 39, 32
-; CHECK-FISL: vor 2, 0, 0
+; CHECK-FISL: xvcmpeqsp [[V1:[0-9]+]], 36, 37
+; CHECK-FISL: xxsel 34, 35, 34, [[V1]]
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test21
@@ -491,14 +427,14 @@ entry:
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test22
-; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 33, 32
-; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 32, 32
-; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 33, 33
+; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 37, 37
+; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 36, 36
+; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 36, 37
 ; CHECK-FISL-DAG: xxlnor
 ; CHECK-FISL-DAG: xxlnor
 ; CHECK-FISL-DAG: xxlor
 ; CHECK-FISL-DAG: xxlor
-; CHECK-FISL: xxsel 0, 38, 39, {{[0-9]+}}
+; CHECK-FISL: xxsel 34, 35, 34, {{[0-9]+}}
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test22
@@ -526,11 +462,7 @@ entry:
 
 ; CHECK-FISL-LABEL: @test23
 ; CHECK-FISL: vcmpequh 4, 4, 5
-; CHECK-FISL: vor 0, 3, 3
-; CHECK-FISL: vor 1, 2, 2
-; CHECK-FISL: vor 6, 4, 4
-; CHECK-FISL: xxsel 32, 32, 33, 38
-; CHECK-FISL: vor 2, 0, 
+; CHECK-FISL: xxsel 34, 35, 34, 36
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test23
@@ -552,11 +484,7 @@ entry:
 
 ; CHECK-FISL-LABEL: @test24
 ; CHECK-FISL: vcmpequb 4, 4, 5
-; CHECK-FISL: vor 0, 3, 3
-; CHECK-FISL: vor 1, 2, 2
-; CHECK-FISL: vor 6, 4, 4
-; CHECK-FISL: xxsel 32, 32, 33, 38
-; CHECK-FISL: vor 2, 0, 0
+; CHECK-FISL: xxsel 34, 35, 34, 36
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test24
@@ -682,8 +610,6 @@ define <2 x i64> @test30(<2 x i64>* %a)
 ; CHECK-FISL-LABEL: @test30
 ; CHECK-FISL: lxvd2x 0, 0, 3
 ; CHECK-FISL: xxlor 34, 0, 0
-; CHECK-FISL: vor 3, 2, 2
-; CHECK-FISL: vor 2, 3, 3
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test30
@@ -715,8 +641,7 @@ define <4 x float> @test32(<4 x float>*
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test32
-; CHECK-FISL: lxvw4x 0, 0, 3
-; CHECK-FISL: xxlor 34, 0, 0
+; CHECK-FISL: lxvw4x 34, 0, 3
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test32
@@ -734,8 +659,7 @@ define void @test33(<4 x float>* %a, <4
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test33
-; CHECK-FISL: vor 3, 2, 2
-; CHECK-FISL: stxvw4x 35, 0, 3
+; CHECK-FISL: stxvw4x 34, 0, 3
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test33
@@ -770,8 +694,7 @@ define void @test33u(<4 x float>* %a, <4
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test33u
-; CHECK-FISL: vor 3, 2, 2
-; CHECK-FISL: stxvw4x 35, 0, 3
+; CHECK-FISL: stxvw4x 34, 0, 3
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test33u
@@ -789,8 +712,7 @@ define <4 x i32> @test34(<4 x i32>* %a)
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test34
-; CHECK-FISL: lxvw4x 0, 0, 3
-; CHECK-FISL: xxlor 34, 0, 0
+; CHECK-FISL: lxvw4x 34, 0, 3
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test34
@@ -808,8 +730,7 @@ define void @test35(<4 x i32>* %a, <4 x
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test35
-; CHECK-FISL: vor 3, 2, 2
-; CHECK-FISL: stxvw4x 35, 0, 3
+; CHECK-FISL: stxvw4x 34, 0, 3
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test35
@@ -1086,10 +1007,7 @@ define <2 x i1> @test65(<2 x i64> %a, <2
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test65
-; CHECK-FISL: vor 4, 3, 3
-; CHECK-FISL: vor 5, 2, 2
-; CHECK-FISL: vcmpequw 4, 5, 4
-; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: vcmpequw 2, 2, 3
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test65
@@ -1107,8 +1025,8 @@ define <2 x i1> @test66(<2 x i64> %a, <2
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test66
-; CHECK-FISL: vcmpequw {{[0-9]+}}, 5, 4
-; CHECK-FISL: xxlnor 34, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-FISL: vcmpequw 2, 2, 3
+; CHECK-FISL: xxlnor 34, 34, 34
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test66