[llvm] r339260 - [PowerPC] Improve codegen for vector loads using scalar_to_vector

Zaara Syeda via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 8 08:20:44 PDT 2018


Author: syzaara
Date: Wed Aug  8 08:20:43 2018
New Revision: 339260

URL: http://llvm.org/viewvc/llvm-project?rev=339260&view=rev
Log:
[PowerPC] Improve codegen for vector loads using scalar_to_vector

This patch aims to improve the codegen for vector loads involving the
scalar_to_vector (load X) sequence. Initially, ld->mv instructions were used
for scalar_to_vector (load X), so this patch allows scalar_to_vector (load X)
to utilize:

LXSD and LXSDX for i64 and f64
LXSIWAX for i32 (sign extension to i64)
LXSIWZX for i32 and f64

Committing on behalf of Amy Kwan.
Differential Revision: https://reviews.llvm.org/D48950

Added:
    llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_1.ll
    llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll
    llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_3.ll
    llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_4.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/P9InstrResources.td
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/trunk/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll
    llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll
    llvm/trunk/test/CodeGen/PowerPC/load-v4i8-improved.ll
    llvm/trunk/test/CodeGen/PowerPC/power9-moves-and-splats.ll
    llvm/trunk/test/CodeGen/PowerPC/qpx-load-splat.ll
    llvm/trunk/test/CodeGen/PowerPC/swaps-le-6.ll
    llvm/trunk/test/CodeGen/PowerPC/vsx_insert_extract_le.ll

Modified: llvm/trunk/lib/Target/PowerPC/P9InstrResources.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/P9InstrResources.td?rev=339260&r1=339259&r2=339260&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/P9InstrResources.td (original)
+++ llvm/trunk/lib/Target/PowerPC/P9InstrResources.td Wed Aug  8 08:20:43 2018
@@ -592,6 +592,7 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_
     XXPERM,
     XXPERMR,
     XXSLDWI,
+    XXSLDWIs,
     XXSPLTIB,
     XXSPLTW,
     XXSPLTWs,

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=339260&r1=339259&r2=339260&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Wed Aug  8 08:20:43 2018
@@ -8454,17 +8454,6 @@ SDValue PPCTargetLowering::LowerVECTOR_S
     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
       int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
 
-      // If the source for the shuffle is a scalar_to_vector that came from a
-      // 32-bit load, it will have used LXVWSX so we don't need to splat again.
-      if (Subtarget.hasP9Vector() &&
-          ((isLittleEndian && SplatIdx == 3) ||
-           (!isLittleEndian && SplatIdx == 0))) {
-        SDValue Src = V1.getOperand(0);
-        if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
-            Src.getOperand(0).getOpcode() == ISD::LOAD &&
-            Src.getOperand(0).hasOneUse())
-          return V1;
-      }
       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
                                   DAG.getConstant(SplatIdx, dl, MVT::i32));

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td?rev=339260&r1=339259&r2=339260&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td Wed Aug  8 08:20:43 2018
@@ -877,6 +877,12 @@ let Uses = [RM] in {
                        "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm,
                        [(set v4i32:$XT, (PPCvecshl v4i32:$XA, v4i32:$XB,
                                                   imm32SExt16:$SHW))]>;
+
+  let isCodeGenOnly = 1 in
+  def XXSLDWIs : XX3Form_2s<60, 2,
+                       (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$SHW),
+                       "xxsldwi $XT, $XA, $XA, $SHW", IIC_VecPerm, []>;
+
   def XXSPLTW : XX2Form_2<60, 164,
                        (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
                        "xxspltw $XT, $XB, $UIM", IIC_VecPerm,
@@ -886,6 +892,7 @@ let Uses = [RM] in {
   def XXSPLTWs : XX2Form_2<60, 164,
                        (outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM),
                        "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
+
 } // hasSideEffects
 } // UseVSXReg = 1
 
@@ -1466,8 +1473,6 @@ let AddedComplexity = 400 in { // Prefer
                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
             (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
   }
-  def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)),
-            (v4i32 (XXSPLTWs (LIWAX xoaddr:$src), 1))>;
 
   // Instructions for converting float to i64 feeding a store.
   let Predicates = [NoP9Vector] in {
@@ -3050,13 +3055,47 @@ let AddedComplexity = 400, Predicates =
             (STXVX $rS, xoaddr:$dst)>;
   def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
             (STXVX $rS, xoaddr:$dst)>;
-  def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
-            (v4i32 (LXVWSX xoaddr:$src))>;
-  def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
-            (v4f32 (LXVWSX xoaddr:$src))>;
-  def : Pat<(v4f32 (scalar_to_vector
-                     (f32 (fpround (f64 (extloadf32 xoaddr:$src)))))),
-            (v4f32 (LXVWSX xoaddr:$src))>;
+
+  let AddedComplexity = 400 in {
+    // LIWAX - This instruction is used for sign extending i32 -> i64.
+    // LIWZX - This instruction will be emitted for i32, f32, and when
+    //         zero-extending i32 to i64 (zext i32 -> i64).
+    let Predicates = [IsLittleEndian] in {
+
+      def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
+                (v2i64 (XXPERMDIs
+                (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC), 2))>;
+
+      def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
+                (v2i64 (XXPERMDIs
+                (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
+
+      def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
+                (v4i32 (XXPERMDIs
+                (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
+
+      def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
+                (v4f32 (XXPERMDIs
+                (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
+    }
+
+    let Predicates = [IsBigEndian] in {
+      def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
+                (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>;
+
+      def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
+                (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>;
+
+      def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
+                (v4i32 (XXSLDWIs
+                (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
+
+      def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
+                (v4f32 (XXSLDWIs
+                (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
+    }
+
+  }
 
   // Build vectors from i8 loads
   def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)),
@@ -3218,6 +3257,39 @@ let AddedComplexity = 400, Predicates =
   def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))),
             (f32 (DFLOADf32 ixaddr:$src))>;
 
+
+  let AddedComplexity = 400 in {
+  // The following pseudoinstructions are used to ensure the utilization
+  // of all 64 VSX registers.
+    let Predicates = [IsLittleEndian, HasP9Vector] in {
+      def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
+                (v2i64 (XXPERMDIs
+                (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
+      def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
+                (v2i64 (XXPERMDIs
+		(COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
+
+      def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
+                (v2f64 (XXPERMDIs
+                (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
+      def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
+                (v2f64 (XXPERMDIs
+                (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
+    }
+
+    let Predicates = [IsBigEndian, HasP9Vector] in {
+      def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
+                (v2i64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
+      def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
+                (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
+
+      def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
+                (v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
+      def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
+                (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
+    }
+  }
+
   let Predicates = [IsBigEndian, HasP9Vector] in {
 
     // (Un)Signed DWord vector extract -> QP
@@ -3932,3 +4004,4 @@ let AddedComplexity = 400 in {
               (v4i32 (VEXTSH2W $A))>;
   }
 }
+

Modified: llvm/trunk/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll?rev=339260&r1=339259&r2=339260&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll Wed Aug  8 08:20:43 2018
@@ -1,35 +1,46 @@
 ; RUN: llc < %s -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
-; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P8
+; RUN:   -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN:   | FileCheck %s --check-prefix=CHECK-P8
 ; RUN: llc < %s -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
-; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P9
+; RUN:   -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN:   | FileCheck %s --check-prefix=CHECK-P9
 
 @a = external local_unnamed_addr global <4 x i32>, align 16
 @pb = external local_unnamed_addr global float*, align 8
 
 define void @testExpandPostRAPseudo(i32* nocapture readonly %ptr) {
-; CHECK-P8-LABEL:     testExpandPostRAPseudo:
-; CHECK-P8:           lxsiwax 34, 0, 3
-; CHECK-P8-NEXT:      xxspltw 34, 34, 1
-; CHECK-P8-NEXT:      stvx 2, 0, 4
-; CHECK-P8:           #APP
-; CHECK-P8-NEXT:      #Clobber Rigisters
-; CHECK-P8-NEXT:      #NO_APP
-; CHECK-P8-NEXT:      lis 4, 1024
-; CHECK-P8-NEXT:      lfiwax 0, 0, 3
-; CHECK-P8:           stfsx 0, 3, 4
-; CHECK-P8-NEXT:      blr
-
-; CHECK-P9-LABEL:     testExpandPostRAPseudo:
-; CHECK-P9:           lxvwsx 0, 0, 3
-; CHECK-P9:           stxvx 0, 0, 4
-; CHECK-P9:           #APP
-; CHECK-P9-NEXT:      #Clobber Rigisters
-; CHECK-P9-NEXT:      #NO_APP
-; CHECK-P9-NEXT:      lis 4, 1024
-; CHECK-P9-NEXT:      lfiwax 0, 0, 3
-; CHECK-P9:           stfsx 0, 3, 4
-; CHECK-P9-NEXT:      blr
-
+; CHECK-P8-LABEL: testExpandPostRAPseudo:
+; CHECK-P8:  # %bb.0: # %entry
+; CHECK-P8:    lfiwzx f0, 0, r3
+; CHECK-P8:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8:    xxpermdi vs0, f0, f0, 2
+; CHECK-P8:    xxspltw v2, vs0, 3
+; CHECK-P8:    stvx v2, 0, r4
+; CHECK-P8:    lis r4, 1024
+; CHECK-P8:    lfiwax f0, 0, r3
+; CHECK-P8:    addis r3, r2, .LC1 at toc@ha
+; CHECK-P8:    ld r3, .LC1 at toc@l(r3)
+; CHECK-P8:    xscvsxdsp f0, f0
+; CHECK-P8:    ld r3, 0(r3)
+; CHECK-P8:    stfsx f0, r3, r4
+; CHECK-P8:    blr
+;
+; CHECK-P9-LABEL: testExpandPostRAPseudo:
+; CHECK-P9:  # %bb.0: # %entry
+; CHECK-P9:    lfiwzx f0, 0, r3
+; CHECK-P9:    addis r4, r2, .LC0 at toc@ha
+; CHECK-P9:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P9:    xxpermdi vs0, f0, f0, 2
+; CHECK-P9:    xxspltw vs0, vs0, 3
+; CHECK-P9:    stxvx vs0, 0, r4
+; CHECK-P9:    lis r4, 1024
+; CHECK-P9:    lfiwax f0, 0, r3
+; CHECK-P9:    addis r3, r2, .LC1 at toc@ha
+; CHECK-P9:    ld r3, .LC1 at toc@l(r3)
+; CHECK-P9:    xscvsxdsp f0, f0
+; CHECK-P9:    ld r3, 0(r3)
+; CHECK-P9:    stfsx f0, r3, r4
+; CHECK-P9:    blr
 entry:
   %0 = load i32, i32* %ptr, align 4
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0

Modified: llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll?rev=339260&r1=339259&r2=339260&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll Wed Aug  8 08:20:43 2018
@@ -109,8 +109,8 @@
 ;vector int spltRegVali(int val) {                                            //
 ;  return (vector int) val;                                                   //
 ;}                                                                            //
-;// P8: lxsiwax, xxspltw                                                      //
-;// P9: lxvwsx                                                                //
+;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw         //
+;// P9: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw         //
 ;vector int spltMemVali(int *ptr) {                                           //
 ;  return (vector int)*ptr;                                                   //
 ;}                                                                            //
@@ -284,8 +284,8 @@
 ;vector unsigned int spltRegValui(unsigned int val) {                         //
 ;  return (vector unsigned int) val;                                          //
 ;}                                                                            //
-;// P8: lxsiwax, xxspltw                                                      //
-;// P9: lxvwsx                                                                //
+;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw         //
+;// P9: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw         //
 ;vector unsigned int spltMemValui(unsigned int *ptr) {                        //
 ;  return (vector unsigned int)*ptr;                                          //
 ;}                                                                            //
@@ -1202,15 +1202,21 @@ entry:
 ; P9LE-LABEL: spltMemVali
 ; P8BE-LABEL: spltMemVali
 ; P8LE-LABEL: spltMemVali
-; P9BE: lxvwsx v2, 0, r3
-; P9BE: blr
-; P9LE: lxvwsx v2, 0, r3
-; P9LE: blr
-; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3
-; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1
-; P8BE: blr
-; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3
-; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1
+; P9BE: lfiwzx f0, 0, r3
+; P9BE: xxsldwi vs0, f0, f0, 1
+; P9BE: xxspltw v2, vs0, 0
+; P9BE: blr
+; P9LE: lfiwzx f0, 0, r3
+; P9LE: xxpermdi vs0, f0, f0, 2
+; P9LE: xxspltw v2, vs0, 3
+; P9LE: blr
+; P8BE: lfiwzx f0, 0, r3
+; P8BE: xxsldwi vs0, f0, f0, 1
+; P8BE: xxspltw v2, vs0, 0
+; P8BE: blr
+; P8LE: lfiwzx f0, 0, r3
+; P8LE: xxpermdi vs0, f0, f0, 2
+; P8LE: xxspltw v2, vs0, 3
 ; P8LE: blr
 }
 
@@ -2338,15 +2344,21 @@ entry:
 ; P9LE-LABEL: spltMemValui
 ; P8BE-LABEL: spltMemValui
 ; P8LE-LABEL: spltMemValui
-; P9BE: lxvwsx v2, 0, r3
-; P9BE: blr
-; P9LE: lxvwsx v2, 0, r3
-; P9LE: blr
-; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3
-; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1
-; P8BE: blr
-; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3
-; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1
+; P9BE: lfiwzx f0, 0, r3
+; P9BE: xxsldwi vs0, f0, f0, 1
+; P9BE: xxspltw v2, vs0, 0
+; P9BE: blr
+; P9LE: lfiwzx f0, 0, r3
+; P9LE: xxpermdi vs0, f0, f0, 2
+; P9LE: xxspltw v2, vs0, 3
+; P9LE: blr
+; P8BE: lfiwzx f0, 0, r3
+; P8BE: xxsldwi vs0, f0, f0, 1
+; P8BE: xxspltw v2, vs0, 0
+; P8BE: blr
+; P8LE: lfiwzx f0, 0, r3
+; P8LE: xxpermdi vs0, f0, f0, 2
+; P8LE: xxspltw v2, vs0, 3
 ; P8LE: blr
 }
 

Modified: llvm/trunk/test/CodeGen/PowerPC/load-v4i8-improved.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/load-v4i8-improved.ll?rev=339260&r1=339259&r2=339260&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/load-v4i8-improved.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/load-v4i8-improved.ll Wed Aug  8 08:20:43 2018
@@ -1,15 +1,27 @@
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s \
+; RUN:   -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck --check-prefix=CHECK-LE \
 ; RUN:   -implicit-check-not vmrg -implicit-check-not=vperm %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck \
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s \
+; RUN:   -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck \
 ; RUN:   -implicit-check-not vmrg -implicit-check-not=vperm %s
 
 define <16 x i8> @test(i32* %s, i32* %t) {
+; CHECK-LE-LABEL: test:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    lfiwzx f0, 0, r3
+; CHECK-LE-NEXT:    xxpermdi vs0, f0, f0, 2
+; CHECK-LE-NEXT:    xxspltw v2, vs0, 3
+; CHECK-LE-NEXT:    blr
+
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lfiwzx f0, 0, r3
+; CHECK-NEXT:    xxsldwi vs0, f0, f0, 1
+; CHECK-NEXT:    xxspltw v2, vs0, 0
+; CHECK-NEXT:    blr
 entry:
   %0 = bitcast i32* %s to <4 x i8>*
   %1 = load <4 x i8>, <4 x i8>* %0, align 4
   %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   ret <16 x i8> %2
-; CHECK-LABEL: test
-; CHECK: lxsiwax 34, 0, 3
-; CHECK: xxspltw 34, 34, 1
 }

Modified: llvm/trunk/test/CodeGen/PowerPC/power9-moves-and-splats.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/power9-moves-and-splats.ll?rev=339260&r1=339259&r2=339260&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/power9-moves-and-splats.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/power9-moves-and-splats.ll Wed Aug  8 08:20:43 2018
@@ -1,47 +1,74 @@
-; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
-; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \
-; RUN:   --check-prefix=CHECK-BE
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
+; RUN:   -ppc-asm-full-reg-names < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu -ppc-vsr-nums-as-vr \
+; RUN:   -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=CHECK-BE
 
 @Globi = external global i32, align 4
 @Globf = external global float, align 4
 
 define <2 x i64> @test1(i64 %a, i64 %b) {
+; CHECK-LABEL: test1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mtvsrdd v2, r4, r3
+; CHECK-NEXT:    blr
+
+; CHECK-BE-LABEL: test1:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrdd v2, r3, r4
+; CHECK-BE-NEXT:    blr
 entry:
 ; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp
 ; which will happen in a subsequent patch.
-; CHECK-LABEL: test1
-; CHECK: mtvsrdd 34, 4, 3
-; CHECK-BE-LABEL: test1
-; CHECK-BE: mtvsrdd 34, 3, 4
   %vecins = insertelement <2 x i64> undef, i64 %a, i32 0
   %vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1
   ret <2 x i64> %vecins1
 }
 
 define i64 @test2(<2 x i64> %a) {
+; CHECK-LABEL: test2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrld r3, v2
+; CHECK-NEXT:    blr
+
+; CHECK-BE-LABEL: test2:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
 entry:
-; CHECK-LABEL: test2
-; CHECK: mfvsrld 3, 34
   %0 = extractelement <2 x i64> %a, i32 0
   ret i64 %0
 }
 
 define i64 @test3(<2 x i64> %a) {
+; CHECK-LABEL: test3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    blr
+
+; CHECK-BE-LABEL: test3:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mfvsrld r3, v2
+; CHECK-BE-NEXT:    blr
 entry:
-; CHECK-BE-LABEL: test3
-; CHECK-BE: mfvsrld 3, 34
   %0 = extractelement <2 x i64> %a, i32 1
   ret i64 %0
 }
 
 define <4 x i32> @test4(i32* nocapture readonly %in) {
+; CHECK-LABEL: test4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lfiwzx f0, 0, r3
+; CHECK-NEXT:    xxpermdi vs0, f0, f0, 2
+; CHECK-NEXT:    xxspltw v2, vs0, 3
+; CHECK-NEXT:    blr
+
+; CHECK-BE-LABEL: test4:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lfiwzx f0, 0, r3
+; CHECK-BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; CHECK-BE-NEXT:    xxspltw v2, vs0, 0
+; CHECK-BE-NEXT:    blr
 entry:
-; CHECK-LABEL: test4
-; CHECK: lxvwsx 34, 0, 3
-; CHECK-NOT: xxspltw
-; CHECK-BE-LABEL: test4
-; CHECK-BE: lxvwsx 34, 0, 3
-; CHECK-BE-NOT: xxspltw
   %0 = load i32, i32* %in, align 4
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -49,13 +76,20 @@ entry:
 }
 
 define <4 x float> @test5(float* nocapture readonly %in) {
+; CHECK-LABEL: test5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lfiwzx f0, 0, r3
+; CHECK-NEXT:    xxpermdi vs0, f0, f0, 2
+; CHECK-NEXT:    xxspltw v2, vs0, 3
+; CHECK-NEXT:    blr
+
+; CHECK-BE-LABEL: test5:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lfiwzx f0, 0, r3
+; CHECK-BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; CHECK-BE-NEXT:    xxspltw v2, vs0, 0
+; CHECK-BE-NEXT:    blr
 entry:
-; CHECK-LABEL: test5
-; CHECK: lxvwsx 34, 0, 3
-; CHECK-NOT: xxspltw
-; CHECK-BE-LABEL: test5
-; CHECK-BE: lxvwsx 34, 0, 3
-; CHECK-BE-NOT: xxspltw
   %0 = load float, float* %in, align 4
   %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
   %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
@@ -63,17 +97,24 @@ entry:
 }
 
 define <4 x i32> @test6() {
+; CHECK-LABEL: test6:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT:    lfiwzx f0, 0, r3
+; CHECK-NEXT:    xxpermdi vs0, f0, f0, 2
+; CHECK-NEXT:    xxspltw v2, vs0, 3
+; CHECK-NEXT:    blr
+
+; CHECK-BE-LABEL: test6:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-BE-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-BE-NEXT:    lfiwzx f0, 0, r3
+; CHECK-BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; CHECK-BE-NEXT:    xxspltw v2, vs0, 0
+; CHECK-BE-NEXT:    blr
 entry:
-; CHECK-LABEL: test6
-; CHECK: addis
-; CHECK: ld [[TOC:[0-9]+]], .LC0
-; CHECK: lxvwsx 34, 0, 3
-; CHECK-NOT: xxspltw
-; CHECK-BE-LABEL: test6
-; CHECK-BE: addis
-; CHECK-BE: ld [[TOC:[0-9]+]], .LC0
-; CHECK-BE: lxvwsx 34, 0, 3
-; CHECK-BE-NOT: xxspltw
   %0 = load i32, i32* @Globi, align 4
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -81,17 +122,24 @@ entry:
 }
 
 define <4 x float> @test7() {
+; CHECK-LABEL: test7:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis r3, r2, .LC1 at toc@ha
+; CHECK-NEXT:    ld r3, .LC1 at toc@l(r3)
+; CHECK-NEXT:    lfiwzx f0, 0, r3
+; CHECK-NEXT:    xxpermdi vs0, f0, f0, 2
+; CHECK-NEXT:    xxspltw v2, vs0, 3
+; CHECK-NEXT:    blr
+
+; CHECK-BE-LABEL: test7:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    addis r3, r2, .LC1 at toc@ha
+; CHECK-BE-NEXT:    ld r3, .LC1 at toc@l(r3)
+; CHECK-BE-NEXT:    lfiwzx f0, 0, r3
+; CHECK-BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; CHECK-BE-NEXT:    xxspltw v2, vs0, 0
+; CHECK-BE-NEXT:    blr
 entry:
-; CHECK-LABEL: test7
-; CHECK: addis
-; CHECK: ld [[TOC:[0-9]+]], .LC1
-; CHECK: lxvwsx 34, 0, 3
-; CHECK-NOT: xxspltw
-; CHECK-BE-LABEL: test7
-; CHECK-BE: addis
-; CHECK-BE: ld [[TOC:[0-9]+]], .LC1
-; CHECK-BE: lxvwsx 34, 0, 3
-; CHECK-BE-NOT: xxspltw
   %0 = load float, float* @Globf, align 4
   %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
   %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
@@ -99,76 +147,120 @@ entry:
 }
 
 define <16 x i8> @test8() {
+; CHECK-LABEL: test8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxlxor v2, v2, v2
+; CHECK-NEXT:    blr
+
+; CHECK-BE-LABEL: test8:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxlxor v2, v2, v2
+; CHECK-BE-NEXT:    blr
 entry:
-; CHECK-LABEL: test8
-; CHECK: xxlxor 34, 34, 34
-; CHECK-BE-LABEL: test8
-; CHECK-BE: xxlxor 34, 34, 34
   ret <16 x i8> zeroinitializer
 }
 
 define <16 x i8> @test9() {
+; CHECK-LABEL: test9:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxspltib v2, 1
+; CHECK-NEXT:    blr
+
+; CHECK-BE-LABEL: test9:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxspltib v2, 1
+; CHECK-BE-NEXT:    blr
 entry:
-; CHECK-LABEL: test9
-; CHECK: xxspltib 34, 1
-; CHECK-BE-LABEL: test9
-; CHECK-BE: xxspltib 34, 1
   ret <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
 }
 
 define <16 x i8> @test10() {
+; CHECK-LABEL: test10:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxspltib v2, 127
+; CHECK-NEXT:    blr
+
+; CHECK-BE-LABEL: test10:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxspltib v2, 127
+; CHECK-BE-NEXT:    blr
 entry:
-; CHECK-LABEL: test10
-; CHECK: xxspltib 34, 127
-; CHECK-BE-LABEL: test10
-; CHECK-BE: xxspltib 34, 127
   ret <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>
 }
 
 define <16 x i8> @test11() {
+; CHECK-LABEL: test11:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxspltib v2, 128
+; CHECK-NEXT:    blr
+
+; CHECK-BE-LABEL: test11:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxspltib v2, 128
+; CHECK-BE-NEXT:    blr
 entry:
-; CHECK-LABEL: test11
-; CHECK: xxspltib 34, 128
-; CHECK-BE-LABEL: test11
-; CHECK-BE: xxspltib 34, 128
   ret <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
 }
 
 define <16 x i8> @test12() {
+; CHECK-LABEL: test12:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxspltib v2, 255
+; CHECK-NEXT:    blr
+
+; CHECK-BE-LABEL: test12:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxspltib v2, 255
+; CHECK-BE-NEXT:    blr
 entry:
-; CHECK-LABEL: test12
-; CHECK: xxspltib 34, 255
-; CHECK-BE-LABEL: test12
-; CHECK-BE: xxspltib 34, 255
   ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 }
 
 define <16 x i8> @test13() {
+; CHECK-LABEL: test13:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxspltib v2, 129
+; CHECK-NEXT:    blr
+
+; CHECK-BE-LABEL: test13:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxspltib v2, 129
+; CHECK-BE-NEXT:    blr
 entry:
-; CHECK-LABEL: test13
-; CHECK: xxspltib 34, 129
-; CHECK-BE-LABEL: test13
-; CHECK-BE: xxspltib 34, 129
   ret <16 x i8> <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
 }
 
 define <16 x i8> @test13E127() {
+; CHECK-LABEL: test13E127:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxspltib v2, 200
+; CHECK-NEXT:    blr
+
+; CHECK-BE-LABEL: test13E127:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxspltib v2, 200
+; CHECK-BE-NEXT:    blr
 entry:
-; CHECK-LABEL: test13E127
-; CHECK: xxspltib 34, 200
-; CHECK-BE-LABEL: test13E127
-; CHECK-BE: xxspltib 34, 200
   ret <16 x i8> <i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200>
 }
 
 define <4 x i32> @test14(<4 x i32> %a, i32* nocapture readonly %b) {
+; CHECK-LABEL: test14:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lwz r3, 0(r5)
+; CHECK-NEXT:    mtvsrws v2, r3
+; CHECK-NEXT:    addi r3, r3, 5
+; CHECK-NEXT:    stw r3, 0(r5)
+; CHECK-NEXT:    blr
+
+; CHECK-BE-LABEL: test14:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lwz r3, 0(r5)
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    addi r3, r3, 5
+; CHECK-BE-NEXT:    stw r3, 0(r5)
+; CHECK-BE-NEXT:    blr
 entry:
-; CHECK-LABEL: test14
-; CHECK: lwz [[LD:[0-9]+]],
-; CHECK: mtvsrws 34, [[LD]]
-; CHECK-BE-LABEL: test14
-; CHECK-BE: lwz [[LD:[0-9]+]],
-; CHECK-BE: mtvsrws 34, [[LD]]
   %0 = load i32, i32* %b, align 4
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer

Modified: llvm/trunk/test/CodeGen/PowerPC/qpx-load-splat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/qpx-load-splat.ll?rev=339260&r1=339259&r2=339260&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/qpx-load-splat.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/qpx-load-splat.ll Wed Aug  8 08:20:43 2018
@@ -1,35 +1,44 @@
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
-target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64-bgq-linux"
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
+; RUN:   -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x double> @foo(double* nocapture readonly %a) #0 {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvdsx v2, 0, r3
+; CHECK-NEXT:    vmr v3, v2
+; CHECK-NEXT:    blr
 entry:
   %0 = load double, double* %a, align 8
   %vecinit.i = insertelement <4 x double> undef, double %0, i32 0
   %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
   ret <4 x double> %shuffle.i
-
-; CHECK-LABEL: @foo
-; CHECK: lfd 1, 0(3)
-; CHECK: blr
 }
 
 define <4 x double> @foox(double* nocapture readonly %a, i64 %idx) #0 {
+; CHECK-LABEL: foox:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi r4, r4, 3
+; CHECK-NEXT:    lxvdsx v2, r3, r4
+; CHECK-NEXT:    vmr v3, v2
+; CHECK-NEXT:    blr
 entry:
   %p = getelementptr double, double* %a, i64 %idx
   %0 = load double, double* %p, align 8
   %vecinit.i = insertelement <4 x double> undef, double %0, i32 0
   %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
   ret <4 x double> %shuffle.i
-
-; CHECK-LABEL: @foox
-; CHECK: sldi [[REG1:[0-9]+]], 4, 3
-; CHECK: lfdx 1, 3, [[REG1]]
-; CHECK: blr
 }
 
 define <4 x double> @fooxu(double* nocapture readonly %a, i64 %idx, double** %pptr) #0 {
+; CHECK-LABEL: fooxu:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi r4, r4, 3
+; CHECK-NEXT:    lfdux f0, r3, r4
+; CHECK-NEXT:    xxspltd v2, vs0, 0
+; CHECK-NEXT:    std r3, 0(r5)
+; CHECK-NEXT:    vmr v3, v2
+; CHECK-NEXT:    blr
 entry:
   %p = getelementptr double, double* %a, i64 %idx
   %0 = load double, double* %p, align 8
@@ -37,39 +46,36 @@ entry:
   %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
   store double* %p, double** %pptr, align 8
   ret <4 x double> %shuffle.i
-
-; CHECK-LABEL: @foox
-; CHECK: sldi [[REG1:[0-9]+]], 4, 3
-; CHECK: lfdux 1, 3, [[REG1]]
-; CHECK: std 3, 0(5)
-; CHECK: blr
 }
 
 define <4 x float> @foof(float* nocapture readonly %a) #0 {
+; CHECK-LABEL: foof:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lfiwzx f0, 0, r3
+; CHECK-NEXT:    xxpermdi vs0, f0, f0, 2
+; CHECK-NEXT:    xxspltw v2, vs0, 3
+; CHECK-NEXT:    blr
 entry:
   %0 = load float, float* %a, align 4
   %vecinit.i = insertelement <4 x float> undef, float %0, i32 0
   %shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
   ret <4 x float> %shuffle.i
-
-; CHECK-LABEL: @foof
-; CHECK: lfs 1, 0(3)
-; CHECK: blr
 }
 
 define <4 x float> @foofx(float* nocapture readonly %a, i64 %idx) #0 {
+; CHECK-LABEL: foofx:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi r4, r4, 2
+; CHECK-NEXT:    lfiwzx f0, r3, r4
+; CHECK-NEXT:    xxpermdi vs0, f0, f0, 2
+; CHECK-NEXT:    xxspltw v2, vs0, 3
+; CHECK-NEXT:    blr
 entry:
   %p = getelementptr float, float* %a, i64 %idx
   %0 = load float, float* %p, align 4
   %vecinit.i = insertelement <4 x float> undef, float %0, i32 0
   %shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
   ret <4 x float> %shuffle.i
-
-; CHECK-LABEL: @foofx
-; CHECK: sldi [[REG1:[0-9]+]], 4, 2
-; CHECK: lfsx 1, 3, [[REG1]]
-; CHECK: blr
 }
 
-attributes #0 = { norecurse nounwind readonly "target-cpu"="a2q" "target-features"="+qpx,-altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" }
 

Added: llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_1.ll?rev=339260&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_1.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_1.ll Wed Aug  8 08:20:43 2018
@@ -0,0 +1,292 @@
+; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:		-mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
+; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
+; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:    -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
+; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test1(i64* nocapture readonly %int64, <2 x i64> %vec) {
+; P9LE-LABEL: s2v_test1:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfd f0, 0(r3)
+; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P9LE-NEXT:    xxpermdi v2, v2, v3, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test1:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfd f0, 0(r3)
+; P9BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT:    blr
+entry:
+  %0 = load i64, i64* %int64, align 8
+  %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
+  ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test2(i64* nocapture readonly %int64, <2 x i64> %vec)  {
+; P9LE-LABEL: s2v_test2:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfd f0, 8(r3)
+; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P9LE-NEXT:    xxpermdi v2, v2, v3, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test2:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfd f0, 8(r3)
+; P9BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT:    blr
+entry:
+  %arrayidx = getelementptr inbounds i64, i64* %int64, i64 1
+  %0 = load i64, i64* %arrayidx, align 8
+  %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
+  ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test3(i64* nocapture readonly %int64, <2 x i64> %vec, i32 signext %Idx)  {
+; P9LE-LABEL: s2v_test3:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r7, 3
+; P9LE-NEXT:    lfdx f0, r3, r4
+; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P9LE-NEXT:    xxpermdi v2, v2, v3, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test3
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r7, 3
+; P9BE-NEXT:    lfdx f0, r3, r4
+; P9BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT:    blr
+entry:
+  %idxprom = sext i32 %Idx to i64
+  %arrayidx = getelementptr inbounds i64, i64* %int64, i64 %idxprom
+  %0 = load i64, i64* %arrayidx, align 8
+  %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
+  ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test4(i64* nocapture readonly %int64, <2 x i64> %vec)  {
+; P9LE-LABEL: s2v_test4:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfd f0, 8(r3)
+; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P9LE-NEXT:    xxpermdi v2, v2, v3, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test4:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfd f0, 8(r3)
+; P9BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT:    blr
+entry:
+  %arrayidx = getelementptr inbounds i64, i64* %int64, i64 1
+  %0 = load i64, i64* %arrayidx, align 8
+  %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
+  ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test5(<2 x i64> %vec, i64* nocapture readonly %ptr1)  {
+; P9LE-LABEL: s2v_test5:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfd f0, 0(r5)
+; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P9LE-NEXT:    xxpermdi v2, v2, v3, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test5:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfd f0, 0(r5)
+; P9BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT:    blr
+entry:
+  %0 = load i64, i64* %ptr1, align 8
+  %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
+  ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x double> @s2v_test_f1(double* nocapture readonly %f64, <2 x double> %vec)  {
+; P9LE-LABEL: s2v_test_f1:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfd f0, 0(r3)
+; P9LE-NEXT:    xxpermdi vs0, f0, f0, 2
+; P9LE-NEXT:    xxpermdi v2, v2, vs0, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test_f1:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfd f0, 0(r3)
+; P9BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: s2v_test_f1:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfdx f0, 0, r3
+; P8LE-NEXT:    xxspltd vs0, vs0, 0
+; P8LE-NEXT:    xxpermdi v2, v2, vs0, 1
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test_f1:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfdx f0, 0, r3
+; P8BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT:    blr
+entry:
+  %0 = load double, double* %f64, align 8
+  %vecins = insertelement <2 x double> %vec, double %0, i32 0
+  ret <2 x double> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x double> @s2v_test_f2(double* nocapture readonly %f64, <2 x double> %vec)  {
+; P9LE-LABEL: s2v_test_f2:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfd f0, 8(r3)
+; P9LE-NEXT:    xxpermdi vs0, f0, f0, 2
+; P9LE-NEXT:    xxpermdi v2, v2, vs0, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test_f2:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfd f0, 8(r3)
+; P9BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: s2v_test_f2:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addi r3, r3, 8
+; P8LE-NEXT:    lfdx f0, 0, r3
+; P8LE-NEXT:    xxspltd vs0, vs0, 0
+; P8LE-NEXT:    xxpermdi v2, v2, vs0, 1
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test_f2:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addi r3, r3, 8
+; P8BE-NEXT:    lfdx f0, 0, r3
+; P8BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT:    blr
+entry:
+  %arrayidx = getelementptr inbounds double, double* %f64, i64 1
+  %0 = load double, double* %arrayidx, align 8
+  %vecins = insertelement <2 x double> %vec, double %0, i32 0
+  ret <2 x double> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x double> @s2v_test_f3(double* nocapture readonly %f64, <2 x double> %vec, i32 signext %Idx)  {
+; P9LE-LABEL: s2v_test_f3:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r7, 3
+; P9LE-NEXT:    lfdx f0, r3, r4
+; P9LE-NEXT:    xxpermdi vs0, f0, f0, 2
+; P9LE-NEXT:    xxpermdi v2, v2, vs0, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test_f3:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r7, 3
+; P9BE-NEXT:    lfdx f0, r3, r4
+; P9BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: s2v_test_f3:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r7, 3
+; P8LE-NEXT:    lfdx f0, r3, r4
+; P8LE-NEXT:    xxspltd vs0, vs0, 0
+; P8LE-NEXT:    xxpermdi v2, v2, vs0, 1
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test_f3:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r7, 3
+; P8BE-NEXT:    lfdx f0, r3, r4
+; P8BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT:    blr
+entry:
+  %idxprom = sext i32 %Idx to i64
+  %arrayidx = getelementptr inbounds double, double* %f64, i64 %idxprom
+  %0 = load double, double* %arrayidx, align 8
+  %vecins = insertelement <2 x double> %vec, double %0, i32 0
+  ret <2 x double> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x double> @s2v_test_f4(double* nocapture readonly %f64, <2 x double> %vec)  {
+; P9LE-LABEL: s2v_test_f4:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfd f0, 8(r3)
+; P9LE-NEXT:    xxpermdi vs0, f0, f0, 2
+; P9LE-NEXT:    xxpermdi v2, v2, vs0, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test_f4:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfd f0, 8(r3)
+; P9BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: s2v_test_f4:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addi r3, r3, 8
+; P8LE-NEXT:    lfdx f0, 0, r3
+; P8LE-NEXT:    xxspltd vs0, vs0, 0
+; P8LE-NEXT:    xxpermdi v2, v2, vs0, 1
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test_f4:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addi r3, r3, 8
+; P8BE-NEXT:    lfdx f0, 0, r3
+; P8BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT:    blr
+entry:
+  %arrayidx = getelementptr inbounds double, double* %f64, i64 1
+  %0 = load double, double* %arrayidx, align 8
+  %vecins = insertelement <2 x double> %vec, double %0, i32 0
+  ret <2 x double> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x double> @s2v_test_f5(<2 x double> %vec, double* nocapture readonly %ptr1)  {
+; P9LE-LABEL: s2v_test_f5:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfd f0, 0(r5)
+; P9LE-NEXT:    xxpermdi vs0, f0, f0, 2
+; P9LE-NEXT:    xxpermdi v2, v2, vs0, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test_f5:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfd f0, 0(r5)
+; P9BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: s2v_test_f5:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfdx f0, 0, r5
+; P8LE-NEXT:    xxspltd vs0, vs0, 0
+; P8LE-NEXT:    xxpermdi v2, v2, vs0, 1
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test_f5:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfdx f0, 0, r5
+; P8BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT:    blr
+entry:
+  %0 = load double, double* %ptr1, align 8
+  %vecins = insertelement <2 x double> %vec, double %0, i32 0
+  ret <2 x double> %vecins
+}
+

Added: llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll?rev=339260&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll Wed Aug  8 08:20:43 2018
@@ -0,0 +1,118 @@
+; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:    -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
+; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
+; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:    -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
+; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
+
+define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) {
+; P9LE-LABEL: test_liwzx1:
+; P9LE:       # %bb.0:
+; P9LE-NEXT:    lfiwzx f0, 0, r3
+; P9LE-NEXT:    lfiwzx f1, 0, r4
+; P9LE-NEXT:    xxpermdi vs0, f0, f0, 2
+; P9LE-NEXT:    xxpermdi vs1, f1, f1, 2
+; P9LE-NEXT:    xvaddsp vs0, vs0, vs1
+; P9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; P9LE-NEXT:    xscvspdpn f0, vs0
+; P9LE-NEXT:    stfs f0, 0(r5)
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: test_liwzx1:
+; P9BE:       # %bb.0:
+; P9BE-NEXT:    lfiwzx f0, 0, r3
+; P9BE-NEXT:    lfiwzx f1, 0, r4
+; P9BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; P9BE-NEXT:    xxsldwi vs1, f1, f1, 1
+; P9BE-NEXT:    xvaddsp vs0, vs0, vs1
+; P9BE-NEXT:    xscvspdpn f0, vs0
+; P9BE-NEXT:    stfs f0, 0(r5)
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: test_liwzx1:
+; P8LE:       # %bb.0:
+; P8LE-NEXT:    lfiwzx f0, 0, r3
+; P8LE-NEXT:    lfiwzx f1, 0, r4
+; P8LE-NEXT:    xxpermdi vs0, f0, f0, 2
+; P8LE-NEXT:    xxpermdi vs1, f1, f1, 2
+; P8LE-NEXT:    xvaddsp vs0, vs0, vs1
+; P8LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; P8LE-NEXT:    xscvspdpn f0, vs0
+; P8LE-NEXT:    stfsx f0, 0, r5
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: test_liwzx1:
+; P8BE:       # %bb.0:
+; P8BE-NEXT:    lfiwzx f0, 0, r3
+; P8BE-NEXT:    lfiwzx f1, 0, r4
+; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; P8BE-NEXT:    xxsldwi vs1, f1, f1, 1
+; P8BE-NEXT:    xvaddsp vs0, vs0, vs1
+; P8BE-NEXT:    xscvspdpn f0, vs0
+; P8BE-NEXT:    stfsx f0, 0, r5
+; P8BE-NEXT:    blr
+  %a = load <1 x float>, <1 x float>* %A
+  %b = load <1 x float>, <1 x float>* %B
+  %X = fadd <1 x float> %a, %b
+  store <1 x float> %X, <1 x float>* %C
+  ret void
+}
+
+define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) {
+; P9LE-LABEL: test_liwzx2:
+; P9LE:       # %bb.0:
+; P9LE-NEXT:    lfiwzx f0, 0, r3
+; P9LE-NEXT:    lfiwzx f1, 0, r4
+; P9LE-NEXT:    mr r3, r5
+; P9LE-NEXT:    xxpermdi vs0, f0, f0, 2
+; P9LE-NEXT:    xxpermdi vs1, f1, f1, 2
+; P9LE-NEXT:    xvsubsp vs0, vs0, vs1
+; P9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; P9LE-NEXT:    xscvspdpn f0, vs0
+; P9LE-NEXT:    stfs f0, 0(r5)
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: test_liwzx2:
+; P9BE:       # %bb.0:
+; P9BE-NEXT:    lfiwzx f0, 0, r3
+; P9BE-NEXT:    lfiwzx f1, 0, r4
+; P9BE-NEXT:    mr r3, r5
+; P9BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; P9BE-NEXT:    xxsldwi vs1, f1, f1, 1
+; P9BE-NEXT:    xvsubsp vs0, vs0, vs1
+; P9BE-NEXT:    xscvspdpn f0, vs0
+; P9BE-NEXT:    stfs f0, 0(r5)
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: test_liwzx2:
+; P8LE:       # %bb.0:
+; P8LE-NEXT:    lfiwzx f0, 0, r3
+; P8LE-NEXT:    lfiwzx f1, 0, r4
+; P8LE-NEXT:    mr r3, r5
+; P8LE-NEXT:    xxpermdi vs0, f0, f0, 2
+; P8LE-NEXT:    xxpermdi vs1, f1, f1, 2
+; P8LE-NEXT:    xvsubsp vs0, vs0, vs1
+; P8LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; P8LE-NEXT:    xscvspdpn f0, vs0
+; P8LE-NEXT:    stfsx f0, 0, r5
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: test_liwzx2:
+; P8BE:       # %bb.0:
+; P8BE-NEXT:    lfiwzx f0, 0, r3
+; P8BE-NEXT:    lfiwzx f1, 0, r4
+; P8BE-NEXT:    mr r3, r5
+; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; P8BE-NEXT:    xxsldwi vs1, f1, f1, 1
+; P8BE-NEXT:    xvsubsp vs0, vs0, vs1
+; P8BE-NEXT:    xscvspdpn f0, vs0
+; P8BE-NEXT:    stfsx f0, 0, r5
+; P8BE-NEXT:    blr
+  %a = load <1 x float>, <1 x float>* %A
+  %b = load <1 x float>, <1 x float>* %B
+  %X = fsub <1 x float> %a, %b
+  store <1 x float> %X, <1 x float>* %C
+  ret <1 x float>* %C
+}

Added: llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_3.ll?rev=339260&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_3.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_3.ll Wed Aug  8 08:20:43 2018
@@ -0,0 +1,265 @@
+; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:    -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
+; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
+; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:    -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
+; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec)  {
+; P9LE-LABEL: s2v_test1:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfiwax f0, 0, r3
+; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P9LE-NEXT:    xxpermdi v2, v2, v3, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test1:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfiwax f0, 0, r3
+; P9BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: s2v_test1:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfiwax f0, 0, r3
+; P8LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P8LE-NEXT:    xxpermdi v2, v2, v3, 1
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test1:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfiwax f0, 0, r3
+; P8BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT:    blr
+entry:
+  %0 = load i32, i32* %int32, align 4
+  %conv = sext i32 %0 to i64
+  %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
+  ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec)  {
+; P9LE-LABEL: s2v_test2:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addi r3, r3, 4
+; P9LE-NEXT:    lfiwax f0, 0, r3
+; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P9LE-NEXT:    xxpermdi v2, v2, v3, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test2:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addi r3, r3, 4
+; P9BE-NEXT:    lfiwax f0, 0, r3
+; P9BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: s2v_test2:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addi r3, r3, 4
+; P8LE-NEXT:    lfiwax f0, 0, r3
+; P8LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P8LE-NEXT:    xxpermdi v2, v2, v3, 1
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test2:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addi r3, r3, 4
+; P8BE-NEXT:    lfiwax f0, 0, r3
+; P8BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT:    blr
+entry:
+  %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
+  %0 = load i32, i32* %arrayidx, align 4
+  %conv = sext i32 %0 to i64
+  %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
+  ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32 signext %Idx)  {
+; P9LE-LABEL: s2v_test3:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r7, 2
+; P9LE-NEXT:    lfiwax f0, r3, r4
+; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P9LE-NEXT:    xxpermdi v2, v2, v3, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test3:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r7, 2
+; P9BE-NEXT:    lfiwax f0, r3, r4
+; P9BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: s2v_test3:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r7, 2
+; P8LE-NEXT:    lfiwax f0, r3, r4
+; P8LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P8LE-NEXT:    xxpermdi v2, v2, v3, 1
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test3:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r7, 2
+; P8BE-NEXT:    lfiwax f0, r3, r4
+; P8BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT:    blr
+entry:
+  %idxprom = sext i32 %Idx to i64
+  %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %conv = sext i32 %0 to i64
+  %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
+  ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec)  {
+; P9LE-LABEL: s2v_test4:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addi r3, r3, 4
+; P9LE-NEXT:    lfiwax f0, 0, r3
+; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P9LE-NEXT:    xxpermdi v2, v2, v3, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test4:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addi r3, r3, 4
+; P9BE-NEXT:    lfiwax f0, 0, r3
+; P9BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: s2v_test4:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addi r3, r3, 4
+; P8LE-NEXT:    lfiwax f0, 0, r3
+; P8LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P8LE-NEXT:    xxpermdi v2, v2, v3, 1
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test4:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addi r3, r3, 4
+; P8BE-NEXT:    lfiwax f0, 0, r3
+; P8BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT:    blr
+entry:
+  %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
+  %0 = load i32, i32* %arrayidx, align 4
+  %conv = sext i32 %0 to i64
+  %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
+  ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1)  {
+; P9LE-LABEL: s2v_test5:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfiwax f0, 0, r5
+; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P9LE-NEXT:    xxpermdi v2, v2, v3, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test5:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfiwax f0, 0, r5
+; P9BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: s2v_test5:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfiwax f0, 0, r5
+; P8LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P8LE-NEXT:    xxpermdi v2, v2, v3, 1
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test5:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfiwax f0, 0, r5
+; P8BE-NEXT:    xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT:    blr
+entry:
+  %0 = load i32, i32* %ptr1, align 4
+  %conv = sext i32 %0 to i64
+  %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
+  ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr)  {
+; P9LE-LABEL: s2v_test6:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfiwax f0, 0, r3
+; P9LE-NEXT:    xxpermdi v2, f0, f0, 2
+; P9LE-NEXT:    xxspltd v2, v2, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test6:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfiwax f0, 0, r3
+; P9BE-NEXT:    xxspltd v2, vs0, 0
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: s2v_test6:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfiwax f0, 0, r3
+; P8LE-NEXT:    xxpermdi v2, f0, f0, 2
+; P8LE-NEXT:    xxspltd v2, v2, 1
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test6:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfiwax f0, 0, r3
+; P8BE-NEXT:    xxspltd v2, vs0, 0
+; P8BE-NEXT:    blr
+entry:
+  %0 = load i32, i32* %ptr, align 4
+  %conv = sext i32 %0 to i64
+  %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %splat.splat
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr)  {
+; P9LE-LABEL: s2v_test7:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfiwax f0, 0, r3
+; P9LE-NEXT:    xxpermdi v2, f0, f0, 2
+; P9LE-NEXT:    xxspltd v2, v2, 1
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test7:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfiwax f0, 0, r3
+; P9BE-NEXT:    xxspltd v2, vs0, 0
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: s2v_test7:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfiwax f0, 0, r3
+; P8LE-NEXT:    xxpermdi v2, f0, f0, 2
+; P8LE-NEXT:    xxspltd v2, v2, 1
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test7:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfiwax f0, 0, r3
+; P8BE-NEXT:    xxspltd v2, vs0, 0
+; P8BE-NEXT:    blr
+entry:
+  %0 = load i32, i32* %ptr, align 4
+  %conv = sext i32 %0 to i64
+  %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %splat.splat
+}
+

Added: llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_4.ll?rev=339260&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_4.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_4.ll Wed Aug  8 08:20:43 2018
@@ -0,0 +1,341 @@
+; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:    -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
+; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
+; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:    -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
+; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec)  {
+; P8LE-LABEL: s2v_test1:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfiwzx f0, 0, r3
+; P8LE-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
+; P8LE-NEXT:    addi r3, r4, .LCPI0_0 at toc@l
+; P8LE-NEXT:    lvx v4, 0, r3
+; P8LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P8LE-NEXT:    vperm v2, v3, v2, v4
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test1:
+; P8BE:       # %bb.0: # %entry
+; P8BE:         lfiwzx f0, 0, r3
+; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; P8BE:         xxsldwi vs0, v2, vs0, 1
+; P8BE:         xxsldwi v2, vs0, vs0, 3
+; P8BE-NEXT:    blr
+entry:
+  %0 = load i32, i32* %int32, align 4
+  %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
+  ret <4 x i32> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec)  {
+; P8LE-LABEL: s2v_test2:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addi r3, r3, 4
+; P8LE-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
+; P8LE-NEXT:    lfiwzx f0, 0, r3
+; P8LE-NEXT:    addi r3, r4, .LCPI1_0 at toc@l
+; P8LE-NEXT:    lvx v4, 0, r3
+; P8LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P8LE-NEXT:    vperm v2, v3, v2, v4
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test2:
+; P8BE:       # %bb.0: # %entry
+; P8BE:         addi r3, r3, 4
+; P8BE:         lfiwzx f0, 0, r3
+; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; P8BE:         xxsldwi vs0, v2, vs0, 1
+; P8BE:         xxsldwi v2, vs0, vs0, 3
+; P8BE-NEXT:    blr
+entry:
+  %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
+  %0 = load i32, i32* %arrayidx, align 4
+  %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
+  ret <4 x i32> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 signext %Idx)  {
+; P8LE-LABEL: s2v_test3:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r5, r7, 2
+; P8LE-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
+; P8LE-NEXT:    lfiwzx f0, r3, r5
+; P8LE-NEXT:    addi r3, r4, .LCPI2_0 at toc@l
+; P8LE-NEXT:    lvx v4, 0, r3
+; P8LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P8LE-NEXT:    vperm v2, v3, v2, v4
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test3:
+; P8BE:       # %bb.0: # %entry
+; P8BE:         sldi r4, r7, 2
+; P8BE:         lfiwzx f0, r3, r4
+; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; P8BE:         xxsldwi vs0, v2, vs0, 1
+; P8BE:         xxsldwi v2, vs0, vs0, 3
+; P8BE-NEXT:    blr
+entry:
+  %idxprom = sext i32 %Idx to i64
+  %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
+  ret <4 x i32> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec)  {
+; P8LE-LABEL: s2v_test4:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addi r3, r3, 4
+; P8LE-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; P8LE-NEXT:    lfiwzx f0, 0, r3
+; P8LE-NEXT:    addi r3, r4, .LCPI3_0 at toc@l
+; P8LE-NEXT:    lvx v4, 0, r3
+; P8LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P8LE-NEXT:    vperm v2, v3, v2, v4
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test4:
+; P8BE:       # %bb.0: # %entry
+; P8BE:         addi r3, r3, 4
+; P8BE:         lfiwzx f0, 0, r3
+; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; P8BE:         xxsldwi vs0, v2, vs0, 1
+; P8BE:         xxsldwi v2, vs0, vs0, 3
+; P8BE-NEXT:    blr
+entry:
+  %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
+  %0 = load i32, i32* %arrayidx, align 4
+  %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
+  ret <4 x i32> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1)  {
+; P8LE-LABEL: s2v_test5:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfiwzx f0, 0, r5
+; P8LE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
+; P8LE-NEXT:    lvx v4, 0, r3
+; P8LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P8LE-NEXT:    vperm v2, v3, v2, v4
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test5:
+; P8BE:       # %bb.0: # %entry
+; P8BE:         lfiwzx f0, 0, r5
+; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; P8BE:         xxsldwi vs0, v2, vs0, 1
+; P8BE:         xxsldwi v2, vs0, vs0, 3
+; P8BE-NEXT:    blr
+entry:
+  %0 = load i32, i32* %ptr1, align 4
+  %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
+  ret <4 x i32> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec)  {
+; P8LE-LABEL: s2v_test_f1:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfiwzx f0, 0, r3
+; P8LE-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
+; P8LE-NEXT:    addi r3, r4, .LCPI5_0 at toc@l
+; P8LE-NEXT:    lvx v4, 0, r3
+; P8LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P8LE-NEXT:    vperm v2, v3, v2, v4
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test_f1:
+; P8BE:       # %bb.0: # %entry
+; P8BE:         lfiwzx f0, 0, r3
+; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; P8BE:         xxsldwi vs0, v2, vs0, 1
+; P8BE:         xxsldwi v2, vs0, vs0, 3
+; P8BE-NEXT:    blr
+entry:
+  %0 = load float, float* %f64, align 4
+  %vecins = insertelement <4 x float> %vec, float %0, i32 0
+  ret <4 x float> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec)  {
+; P9LE-LABEL: s2v_test_f2:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addi r3, r3, 4
+; P9LE-NEXT:    xxspltw v2, v2, 2
+; P9LE-NEXT:    lfiwzx f0, 0, r3
+; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P9LE-NEXT:    vmrglw v2, v2, v3
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test_f2:
+; P9BE:       # %bb.0: # %entry
+; P9BE:       addi r3, r3, 4
+; P9BE:       xxspltw v2, v2, 1
+; P9BE:       lfiwzx f0, 0, r3
+; P9BE-NEXT:  xxsldwi v3, f0, f0, 1
+; P9BE:       vmrghw v2, v3, v2
+; P9BE-NEXT:  blr
+
+; P8LE-LABEL: s2v_test_f2:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addi r3, r3, 4
+; P8LE-NEXT:    xxspltw v2, v2, 2
+; P8LE-NEXT:    lfiwzx f0, 0, r3
+; P8LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P8LE-NEXT:    vmrglw v2, v2, v3
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test_f2:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addi r3, r3, 4
+; P8BE-NEXT:    xxspltw v2, v2, 1
+; P8BE-NEXT:    lfiwzx f0, 0, r3
+; P8BE-NEXT:    xxsldwi v3, f0, f0, 1
+; P8BE-NEXT:    vmrghw v2, v3, v2
+; P8BE-NEXT:    blr
+entry:
+  %arrayidx = getelementptr inbounds float, float* %f64, i64 1
+  %0 = load float, float* %arrayidx, align 8
+  %vecins = insertelement <2 x float> %vec, float %0, i32 0
+  ret <2 x float> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec, i32 signext %Idx)  {
+; P9LE-LABEL: s2v_test_f3:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r7, 2
+; P9LE-NEXT:    xxspltw v2, v2, 2
+; P9LE-NEXT:    lfiwzx f0, r3, r4
+; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P9LE-NEXT:    vmrglw v2, v2, v3
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test_f3:
+; P9BE:       # %bb.0: # %entry
+; P9BE:         sldi r4, r7, 2
+; P9BE:         xxspltw v2, v2, 1
+; P9BE:         lfiwzx f0, r3, r4
+; P9BE-NEXT:    xxsldwi v3, f0, f0, 1
+; P9BE:         vmrghw v2, v3, v2
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: s2v_test_f3:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r7, 2
+; P8LE-NEXT:    xxspltw v2, v2, 2
+; P8LE-NEXT:    lfiwzx f0, r3, r4
+; P8LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P8LE-NEXT:    vmrglw v2, v2, v3
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test_f3:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r7, 2
+; P8BE-NEXT:    xxspltw v2, v2, 1
+; P8BE-NEXT:    lfiwzx f0, r3, r4
+; P8BE-NEXT:    xxsldwi v3, f0, f0, 1
+; P8BE-NEXT:    vmrghw v2, v3, v2
+; P8BE-NEXT:    blr
+entry:
+  %idxprom = sext i32 %Idx to i64
+  %arrayidx = getelementptr inbounds float, float* %f64, i64 %idxprom
+  %0 = load float, float* %arrayidx, align 8
+  %vecins = insertelement <2 x float> %vec, float %0, i32 0
+  ret <2 x float> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec)  {
+; P9LE-LABEL: s2v_test_f4:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addi r3, r3, 4
+; P9LE-NEXT:    xxspltw v2, v2, 2
+; P9LE-NEXT:    lfiwzx f0, 0, r3
+; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P9LE-NEXT:    vmrglw v2, v2, v3
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test_f4:
+; P9BE:       # %bb.0: # %entry
+; P9BE:         addi r3, r3, 4
+; P9BE:         xxspltw v2, v2, 1
+; P9BE:         lfiwzx f0, 0, r3
+; P9BE-NEXT:    xxsldwi v3, f0, f0, 1
+; P9BE:         vmrghw v2, v3, v2
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: s2v_test_f4:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addi r3, r3, 4
+; P8LE-NEXT:    xxspltw v2, v2, 2
+; P8LE-NEXT:    lfiwzx f0, 0, r3
+; P8LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P8LE-NEXT:    vmrglw v2, v2, v3
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test_f4:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addi r3, r3, 4
+; P8BE-NEXT:    xxspltw v2, v2, 1
+; P8BE-NEXT:    lfiwzx f0, 0, r3
+; P8BE-NEXT:    xxsldwi v3, f0, f0, 1
+; P8BE-NEXT:    vmrghw v2, v3, v2
+; P8BE-NEXT:    blr
+entry:
+  %arrayidx = getelementptr inbounds float, float* %f64, i64 1
+  %0 = load float, float* %arrayidx, align 8
+  %vecins = insertelement <2 x float> %vec, float %0, i32 0
+  ret <2 x float> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1)  {
+; P9LE-LABEL: s2v_test_f5:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfiwzx f0, 0, r5
+; P9LE-NEXT:    xxspltw v2, v2, 2
+; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P9LE-NEXT:    vmrglw v2, v2, v3
+; P9LE-NEXT:    blr
+
+; P9BE-LABEL: s2v_test_f5:
+; P9BE:       # %bb.0: # %entry
+; P9BE:         lfiwzx f0, 0, r5
+; P9BE:         xxspltw v2, v2, 1
+; P9BE-NEXT:    xxsldwi v3, f0, f0, 1
+; P9BE:         vmrghw v2, v3, v2
+; P9BE-NEXT:    blr
+
+; P8LE-LABEL: s2v_test_f5:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfiwzx f0, 0, r5
+; P8LE-NEXT:    xxspltw v2, v2, 2
+; P8LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P8LE-NEXT:    vmrglw v2, v2, v3
+; P8LE-NEXT:    blr
+
+; P8BE-LABEL: s2v_test_f5:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfiwzx f0, 0, r5
+; P8BE-NEXT:    xxspltw v2, v2, 1
+; P8BE-NEXT:    xxsldwi v3, f0, f0, 1
+; P8BE-NEXT:    vmrghw v2, v3, v2
+; P8BE-NEXT:    blr
+entry:
+  %0 = load float, float* %ptr1, align 8
+  %vecins = insertelement <2 x float> %vec, float %0, i32 0
+  ret <2 x float> %vecins
+}
+

Modified: llvm/trunk/test/CodeGen/PowerPC/swaps-le-6.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/swaps-le-6.ll?rev=339260&r1=339259&r2=339260&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/swaps-le-6.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/swaps-le-6.ll Wed Aug  8 08:20:43 2018
@@ -1,12 +1,15 @@
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 \
-; RUN:   -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \
+; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -O3 < %s | FileCheck %s
 
 ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-P9 \
+; RUN:   -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN:   < %s | FileCheck %s --check-prefix=CHECK-P9 \
 ; RUN:   --implicit-check-not xxswapd
 
 ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \
-; RUN:   -verify-machineinstrs -mattr=-power9-vector < %s | FileCheck %s
+; RUN:   -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN:   -mattr=-power9-vector < %s | FileCheck %s
 
 ; These tests verify that VSX swap optimization works when loading a scalar
 ; into a vector register.
@@ -17,6 +20,31 @@
 @y = global double 1.780000e+00, align 8
 
 define void @bar0() {
+; CHECK-LABEL: bar0:
+; CHECK:   # %bb.0: # %entry
+; CHECK:     addis r3, r2, .LC0 at toc@ha
+; CHECK:     addis r4, r2, .LC1 at toc@ha
+; CHECK:     ld r3, .LC0 at toc@l(r3)
+; CHECK:     addis r3, r2, .LC2 at toc@ha
+; CHECK:     ld r3, .LC2 at toc@l(r3)
+; CHECK:     xxpermdi vs0, vs0, vs1, 1
+; CHECK:     stxvd2x vs0, 0, r3
+; CHECK:     blr
+;
+; CHECK-P9-LABEL: bar0:
+; CHECK-P9:   # %bb.0: # %entry
+; CHECK-P9:     addis r3, r2, .LC0 at toc@ha
+; CHECK-P9:     addis r4, r2, .LC1 at toc@ha
+; CHECK-P9:     ld r3, .LC0 at toc@l(r3)
+; CHECK-P9:     ld r4, .LC1 at toc@l(r4)
+; CHECK-P9:     lfd f0, 0(r3)
+; CHECK-P9:     lxvx vs1, 0, r4
+; CHECK-P9:     addis r3, r2, .LC2 at toc@ha
+; CHECK-P9:     ld r3, .LC2 at toc@l(r3)
+; CHECK-P9:     xxpermdi vs0, f0, f0, 2
+; CHECK-P9:     xxpermdi vs0, vs1, vs0, 1
+; CHECK-P9:     stxvx vs0, 0, r3
+; CHECK-P9:     blr
 entry:
   %0 = load <2 x double>, <2 x double>* @x, align 16
   %1 = load double, double* @y, align 8
@@ -25,21 +53,32 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @bar0
-; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
-; CHECK-DAG: lfdx [[REG2:[0-9]+]]
-; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
-; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
-; CHECK: stxvd2x [[REG5]]
-
-; CHECK-P9-LABEL: @bar0
-; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]]
-; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3)
-; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
-; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG1]], [[REG4]], 1
-; CHECK-P9: stxvx [[REG5]]
-
 define void @bar1() {
+; CHECK-LABEL: bar1:
+; CHECK:   # %bb.0: # %entry
+; CHECK:     addis r3, r2, .LC0 at toc@ha
+; CHECK:     addis r4, r2, .LC1 at toc@ha
+; CHECK:     ld r3, .LC0 at toc@l(r3)
+; CHECK:     addis r3, r2, .LC2 at toc@ha
+; CHECK:     ld r3, .LC2 at toc@l(r3)
+; CHECK:     xxmrghd vs0, vs1, vs0
+; CHECK:     stxvd2x vs0, 0, r3
+; CHECK:     blr
+;
+; CHECK-P9-LABEL: bar1:
+; CHECK-P9:   # %bb.0: # %entry
+; CHECK-P9:     addis r3, r2, .LC0 at toc@ha
+; CHECK-P9:     addis r4, r2, .LC1 at toc@ha
+; CHECK-P9:     ld r3, .LC0 at toc@l(r3)
+; CHECK-P9:     ld r4, .LC1 at toc@l(r4)
+; CHECK-P9:     lfd f0, 0(r3)
+; CHECK-P9:     lxvx vs1, 0, r4
+; CHECK-P9:     addis r3, r2, .LC2 at toc@ha
+; CHECK-P9:     ld r3, .LC2 at toc@l(r3)
+; CHECK-P9:     xxpermdi vs0, f0, f0, 2
+; CHECK-P9:     xxmrgld vs0, vs0, vs1
+; CHECK-P9:     stxvx vs0, 0, r3
+; CHECK-P9:     blr
 entry:
   %0 = load <2 x double>, <2 x double>* @x, align 16
   %1 = load double, double* @y, align 8
@@ -48,17 +87,3 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @bar1
-; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
-; CHECK-DAG: lfdx [[REG2:[0-9]+]]
-; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
-; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
-; CHECK: stxvd2x [[REG5]]
-
-; CHECK-P9-LABEL: @bar1
-; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]]
-; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3)
-; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
-; CHECK-P9: xxmrgld [[REG5:[0-9]+]], [[REG4]], [[REG1]]
-; CHECK-P9: stxvx [[REG5]]
-

Modified: llvm/trunk/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vsx_insert_extract_le.ll?rev=339260&r1=339259&r2=339260&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vsx_insert_extract_le.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vsx_insert_extract_le.ll Wed Aug  8 08:20:43 2018
@@ -1,74 +1,125 @@
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \
-; RUN:   -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -ppc-vsr-nums-as-vr \
+; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
+; RUN:   | FileCheck %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector -ppc-vsr-nums-as-vr \
+; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
+; RUN:   | FileCheck --check-prefix=CHECK-P9-VECTOR %s
 
-; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector \
-; RUN:   -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
-
-; RUN: llc -verify-machineinstrs -mcpu=pwr9 \
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
 ; RUN:   -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \
 ; RUN:   --check-prefix=CHECK-P9 --implicit-check-not xxswapd
 
 define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
+; CHECK-LABEL: testi0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    lfdx f1, 0, r4
+; CHECK-NEXT:    xxswapd vs0, vs0
+; CHECK-NEXT:    xxspltd vs1, vs1, 0
+; CHECK-NEXT:    xxpermdi v2, vs0, vs1, 1
+; CHECK-NEXT:    blr
+;
+; CHECK-P9-VECTOR-LABEL: testi0:
+; CHECK-P9-VECTOR:       # %bb.0:
+; CHECK-P9-VECTOR-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P9-VECTOR-NEXT:    lfdx f1, 0, r4
+; CHECK-P9-VECTOR-NEXT:    xxspltd vs1, vs1, 0
+; CHECK-P9-VECTOR-NEXT:    xxswapd vs0, vs0
+; CHECK-P9-VECTOR-NEXT:    xxpermdi v2, vs0, vs1, 1
+; CHECK-P9-VECTOR-NEXT:    blr
+;
+; CHECK-P9-LABEL: testi0:
+; CHECK-P9:       # %bb.0:
+; CHECK-P9-NEXT:    lfd f0, 0(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    xxpermdi vs0, f0, f0, 2
+; CHECK-P9-NEXT:    xxpermdi v2, vs1, vs0, 1
+; CHECK-P9-NEXT:    blr
   %v = load <2 x double>, <2 x double>* %p1
   %s = load double, double* %p2
   %r = insertelement <2 x double> %v, double %s, i32 0
   ret <2 x double> %r
 
-; CHECK-LABEL: testi0
-; CHECK: lxvd2x 0, 0, 3
-; CHECK: lfdx 1, 0, 4
-; CHECK-DAG: xxspltd 1, 1, 0
-; CHECK-DAG: xxswapd 0, 0
-; CHECK: xxpermdi 34, 0, 1, 1
-
-; CHECK-P9-LABEL: testi0
-; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4)
-; CHECK-P9: lxv [[REG2:[0-9]+]], 0(3)
-; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0
-; CHECK-P9: xxpermdi 34, [[REG2]], [[REG3]], 1
+
 }
 
 define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
+; CHECK-LABEL: testi1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    lfdx f1, 0, r4
+; CHECK-NEXT:    xxswapd vs0, vs0
+; CHECK-NEXT:    xxspltd vs1, vs1, 0
+; CHECK-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-NEXT:    blr
+;
+; CHECK-P9-VECTOR-LABEL: testi1:
+; CHECK-P9-VECTOR:       # %bb.0:
+; CHECK-P9-VECTOR-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P9-VECTOR-NEXT:    lfdx f1, 0, r4
+; CHECK-P9-VECTOR-NEXT:    xxspltd vs1, vs1, 0
+; CHECK-P9-VECTOR-NEXT:    xxswapd vs0, vs0
+; CHECK-P9-VECTOR-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-P9-VECTOR-NEXT:    blr
+;
+; CHECK-P9-LABEL: testi1:
+; CHECK-P9:       # %bb.0:
+; CHECK-P9-NEXT:    lfd f0, 0(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    xxpermdi vs0, f0, f0, 2
+; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs1
+; CHECK-P9-NEXT:    blr
   %v = load <2 x double>, <2 x double>* %p1
   %s = load double, double* %p2
   %r = insertelement <2 x double> %v, double %s, i32 1
   ret <2 x double> %r
 
-; CHECK-LABEL: testi1
-; CHECK: lxvd2x 0, 0, 3
-; CHECK: lfdx 1, 0, 4
-; CHECK-DAG: xxspltd 1, 1, 0
-; CHECK-DAG: xxswapd 0, 0
-; CHECK: xxmrgld 34, 1, 0
-
-; CHECK-P9-LABEL: testi1
-; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4)
-; CHECK-P9: lxv [[REG2:[0-9]+]], 0(3)
-; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0
-; CHECK-P9: xxmrgld 34, [[REG3]], [[REG2]]
+
 }
 
 define double @teste0(<2 x double>* %p1) {
+; CHECK-LABEL: teste0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lxvd2x vs1, 0, r3
+; CHECK:         blr
+;
+; CHECK-P9-VECTOR-LABEL: teste0:
+; CHECK-P9-VECTOR:       # %bb.0:
+; CHECK-P9-VECTOR-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P9-VECTOR:         blr
+;
+; CHECK-P9-LABEL: teste0:
+; CHECK-P9:       # %bb.0:
+; CHECK-P9-NEXT:    lfd f1, 0(r3)
+; CHECK-P9-NEXT:    blr
   %v = load <2 x double>, <2 x double>* %p1
   %r = extractelement <2 x double> %v, i32 0
   ret double %r
 
-; CHECK-LABEL: teste0
-; CHECK: lxvd2x 1, 0, 3
 
-; CHECK-P9-LABEL: teste0
-; CHECK-P9: lfd 1, 0(3)
 }
 
 define double @teste1(<2 x double>* %p1) {
+; CHECK-LABEL: teste1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    xxswapd vs1, vs0
+; CHECK:         blr
+;
+; CHECK-P9-VECTOR-LABEL: teste1:
+; CHECK-P9-VECTOR:       # %bb.0:
+; CHECK-P9-VECTOR-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P9-VECTOR-NEXT:    xxswapd vs1, vs0
+; CHECK-P9-VECTOR:         blr
+;
+; CHECK-P9-LABEL: teste1:
+; CHECK-P9:       # %bb.0:
+; CHECK-P9-NEXT:    lfd f1, 8(r3)
+; CHECK-P9-NEXT:    blr
   %v = load <2 x double>, <2 x double>* %p1
   %r = extractelement <2 x double> %v, i32 1
   ret double %r
 
-; CHECK-LABEL: teste1
-; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 1, 0
 
-; CHECK-P9-LABEL: teste1
-; CHECK-P9: lfd 1, 8(3)
 }




More information about the llvm-commits mailing list