[llvm] r352131 - [PowerPC] Exploit store instructions that store a single vector element

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 24 15:44:28 PST 2019


Author: nemanjai
Date: Thu Jan 24 15:44:28 2019
New Revision: 352131

URL: http://llvm.org/viewvc/llvm-project?rev=352131&view=rev
Log:
[PowerPC] Exploit store instructions that store a single vector element

This patch exploits the instructions that store a single element from a vector
to preform a (store (extract_elt)). We already have code that does this with
ISA 3.0 instructions that were added to handle i8/i16 types. However, we had
never exploited the existing ones that handle f32/f64/i32/i64 types.

Differential revision: https://reviews.llvm.org/D56175

Modified:
    llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/trunk/test/CodeGen/PowerPC/extract-and-store.ll
    llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td?rev=352131&r1=352130&r2=352131&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td Thu Jan 24 15:44:28 2019
@@ -3306,7 +3306,27 @@ let AddedComplexity = 400, Predicates =
       def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
                 (v2f64 (XXPERMDIs
                 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
-    }
+      def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddr:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), xaddr:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddr:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), xaddr:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddr:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddr:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ixaddr:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), ixaddr:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 0)), ixaddr:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+                            ixaddr:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ixaddr:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 1)), ixaddr:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>;
+    } // IsLittleEndian, HasP9Vector
 
     let Predicates = [IsBigEndian, HasP9Vector] in {
       def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
@@ -3318,7 +3338,27 @@ let AddedComplexity = 400, Predicates =
                 (v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
       def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
                 (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
-    }
+      def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddr:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), xaddr:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddr:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), xaddr:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddr:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddr:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ixaddr:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), ixaddr:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 1)), ixaddr:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), ixaddr:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ixaddr:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 0)), ixaddr:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>;
+    } // IsBigEndian, HasP9Vector
   }
 
   let Predicates = [IsBigEndian, HasP9Vector] in {
@@ -3833,8 +3873,38 @@ let AddedComplexity = 400 in {
     def : Pat<DWToSPExtractConv.BVS,
               (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
                               (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
+    def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src),
+              (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+    def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src),
+              (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+    // Elements in a register on a BE system are in order <0, 1, 2, 3>.
+    // The store instructions store the second word from the left.
+    // So to align element zero, we need to modulo-left-shift by 3 words.
+    // Similar logic applies for elements 2 and 3.
+    foreach Idx = [ [0,3], [2,1], [3,2] ] in {
+      def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+                (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+                                       sub_64), xoaddr:$src)>;
+      def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+                (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+                                       sub_64), xoaddr:$src)>;
+    }
   }
 
+  let Predicates = [HasP8Vector, IsBigEndian, NoP9Vector] in {
+    def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+    def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+    def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+                          xoaddr:$src)>;
+    def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+                          xoaddr:$src)>;
+   }
+
   // Big endian, available on all targets with VSX
   let Predicates = [IsBigEndian, HasVSX] in {
     def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3870,8 +3940,38 @@ let AddedComplexity = 400 in {
     def : Pat<DWToSPExtractConv.BVS,
               (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
                               (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
+    def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src),
+              (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+    def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src),
+              (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+    // Elements in a register on a LE system are in order <3, 2, 1, 0>.
+    // The store instructions store the second word from the left.
+    // So to align element 3, we need to modulo-left-shift by 3 words.
+    // Similar logic applies for elements 0 and 1.
+    foreach Idx = [ [0,2], [1,1], [3,3] ] in {
+      def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+                (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+                                       sub_64), xoaddr:$src)>;
+      def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+                (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+                                       sub_64), xoaddr:$src)>;
+    }
   }
 
+  let Predicates = [HasP8Vector, IsLittleEndian, NoP9Vector] in {
+    def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+                          xoaddr:$src)>;
+    def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+                          xoaddr:$src)>;
+    def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+    def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+   }
+
   let Predicates = [IsLittleEndian, HasVSX] in {
   // Little endian, available on all targets with VSX
     def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),

Modified: llvm/trunk/test/CodeGen/PowerPC/extract-and-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/extract-and-store.ll?rev=352131&r1=352130&r2=352131&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/extract-and-store.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/extract-and-store.ll Thu Jan 24 15:44:28 2019
@@ -7,26 +7,66 @@
 ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unkknown-unknown \
 ; RUN:   -ppc-asm-full-reg-names -verify-machineinstrs -O2 < %s | FileCheck %s \
 ; RUN:   --check-prefix=CHECK-P9
-; Function Attrs: norecurse nounwind writeonly
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unkknown-unknown \
+; RUN:   -ppc-asm-full-reg-names -verify-machineinstrs -O2 < %s | FileCheck %s \
+; RUN:   --check-prefix=CHECK-P9-BE
+
+define <2 x i64> @testllv(<2 x i64> returned %a, <2 x i64> %b, i64* nocapture %ap, i64 %Idx) local_unnamed_addr #0 {
+; CHECK-LABEL: testllv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, vs34
+; CHECK-NEXT:    sldi r3, r8, 3
+; CHECK-NEXT:    stfdx f0, r7, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: testllv:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    sldi r3, r8, 3
+; CHECK-BE-NEXT:    stxsdx vs34, r7, r3
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-P9-LABEL: testllv:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxswapd vs0, vs34
+; CHECK-P9-NEXT:    sldi r3, r8, 3
+; CHECK-P9-NEXT:    stfdx f0, r7, r3
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: testllv:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    sldi r3, r8, 3
+; CHECK-P9-BE-NEXT:    stxsdx vs34, r7, r3
+; CHECK-P9-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <2 x i64> %a, i32 0
+  %arrayidx = getelementptr inbounds i64, i64* %ap, i64 %Idx
+  store i64 %vecext, i64* %arrayidx, align 8
+  ret <2 x i64> %a
+}
+
 define <2 x i64> @testll0(<2 x i64> returned %a, <2 x i64> %b, i64* nocapture %ap) local_unnamed_addr #0 {
 ; CHECK-LABEL: testll0:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxswapd vs0, vs34
-; CHECK-NEXT:    mfvsrd r3, f0
-; CHECK-NEXT:    std r3, 24(r7)
+; CHECK-NEXT:    stfd f0, 24(r7)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testll0:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mfvsrd r3, vs34
-; CHECK-BE-NEXT:    std r3, 24(r7)
+; CHECK-BE-NEXT:    addi r3, r7, 24
+; CHECK-BE-NEXT:    stxsdx vs34, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testll0:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mfvsrld r3, vs34
-; CHECK-P9-NEXT:    std r3, 24(r7)
+; CHECK-P9-NEXT:    xxswapd vs0, vs34
+; CHECK-P9-NEXT:    stfd f0, 24(r7)
 ; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: testll0:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    stxsd v2, 24(r7)
+; CHECK-P9-BE-NEXT:    blr
 entry:
   %vecext = extractelement <2 x i64> %a, i32 0
   %arrayidx = getelementptr inbounds i64, i64* %ap, i64 3
@@ -38,22 +78,26 @@ entry:
 define <2 x i64> @testll1(<2 x i64> returned %a, i64 %b, i64* nocapture %ap) local_unnamed_addr #0 {
 ; CHECK-LABEL: testll1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mfvsrd r3, vs34
-; CHECK-NEXT:    std r3, 24(r6)
+; CHECK-NEXT:    addi r3, r6, 24
+; CHECK-NEXT:    stxsdx vs34, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testll1:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxswapd vs0, vs34
-; CHECK-BE-NEXT:    mfvsrd r3, f0
-; CHECK-BE-NEXT:    std r3, 24(r6)
+; CHECK-BE-NEXT:    stfd f0, 24(r6)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testll1:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mfvsrd r3, vs34
-; CHECK-P9-NEXT:    std r3, 24(r6)
+; CHECK-P9-NEXT:    stxsd v2, 24(r6)
 ; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: testll1:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    xxswapd vs0, vs34
+; CHECK-P9-BE-NEXT:    stfd f0, 24(r6)
+; CHECK-P9-BE-NEXT:    blr
 entry:
   %vecext = extractelement <2 x i64> %a, i32 1
   %arrayidx = getelementptr inbounds i64, i64* %ap, i64 3
@@ -61,7 +105,39 @@ entry:
   ret <2 x i64> %a
 }
 
-; Function Attrs: norecurse nounwind writeonly
+define <2 x double> @testdv(<2 x double> returned %a, <2 x double> %b, double* nocapture %ap, i64 %Idx) local_unnamed_addr #0 {
+; CHECK-LABEL: testdv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, vs34
+; CHECK-NEXT:    sldi r3, r8, 3
+; CHECK-NEXT:    stfdx f0, r7, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: testdv:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    sldi r3, r8, 3
+; CHECK-BE-NEXT:    stxsdx vs34, r7, r3
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-P9-LABEL: testdv:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxswapd vs0, vs34
+; CHECK-P9-NEXT:    sldi r3, r8, 3
+; CHECK-P9-NEXT:    stfdx f0, r7, r3
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: testdv:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    sldi r3, r8, 3
+; CHECK-P9-BE-NEXT:    stxsdx vs34, r7, r3
+; CHECK-P9-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <2 x double> %a, i32 0
+  %arrayidx = getelementptr inbounds double, double* %ap, i64 %Idx
+  store double %vecext, double* %arrayidx, align 8
+  ret <2 x double> %a
+}
+
 define <2 x double> @testd0(<2 x double> returned %a, <2 x double> %b, double* nocapture %ap) local_unnamed_addr #0 {
 ; CHECK-LABEL: testd0:
 ; CHECK:       # %bb.0: # %entry
@@ -80,6 +156,11 @@ define <2 x double> @testd0(<2 x double>
 ; CHECK-P9-NEXT:    xxswapd vs0, vs34
 ; CHECK-P9-NEXT:    stfd f0, 24(r7)
 ; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: testd0:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    stxsd v2, 24(r7)
+; CHECK-P9-BE-NEXT:    blr
 entry:
   %vecext = extractelement <2 x double> %a, i32 0
   %arrayidx = getelementptr inbounds double, double* %ap, i64 3
@@ -105,6 +186,12 @@ define <2 x double> @testd1(<2 x double>
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    stxsd v2, 24(r7)
 ; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: testd1:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    xxswapd vs0, vs34
+; CHECK-P9-BE-NEXT:    stfd f0, 24(r7)
+; CHECK-P9-BE-NEXT:    blr
 entry:
   %vecext = extractelement <2 x double> %a, i32 1
   %arrayidx = getelementptr inbounds double, double* %ap, i64 3
@@ -116,23 +203,31 @@ entry:
 define <4 x float> @testf0(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 {
 ; CHECK-LABEL: testf0:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 3
-; CHECK-NEXT:    xscvspdpn f0, vs0
-; CHECK-NEXT:    stfs f0, 12(r7)
+; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-NEXT:    addi r3, r7, 12
+; CHECK-NEXT:    stfiwx f0, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testf0:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xscvspdpn f0, vs34
-; CHECK-BE-NEXT:    stfs f0, 12(r7)
+; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-BE-NEXT:    addi r3, r7, 12
+; CHECK-BE-NEXT:    stfiwx f0, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testf0:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 3
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    stfs f0, 12(r7)
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-P9-NEXT:    addi r3, r7, 12
+; CHECK-P9-NEXT:    stfiwx f0, 0, r3
 ; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: testf0:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-P9-BE-NEXT:    addi r3, r7, 12
+; CHECK-P9-BE-NEXT:    stfiwx f0, 0, r3
+; CHECK-P9-BE-NEXT:    blr
 entry:
   %vecext = extractelement <4 x float> %a, i32 0
   %arrayidx = getelementptr inbounds float, float* %ap, i64 3
@@ -144,24 +239,29 @@ entry:
 define <4 x float> @testf1(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 {
 ; CHECK-LABEL: testf1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxswapd vs0, vs34
-; CHECK-NEXT:    xscvspdpn f0, vs0
-; CHECK-NEXT:    stfs f0, 12(r7)
+; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-NEXT:    addi r3, r7, 12
+; CHECK-NEXT:    stfiwx f0, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testf1:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 1
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    stfs f0, 12(r7)
+; CHECK-BE-NEXT:    addi r3, r7, 12
+; CHECK-BE-NEXT:    stxsiwx vs34, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testf1:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xxswapd vs0, vs34
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    stfs f0, 12(r7)
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-NEXT:    addi r3, r7, 12
+; CHECK-P9-NEXT:    stfiwx f0, 0, r3
 ; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: testf1:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    addi r3, r7, 12
+; CHECK-P9-BE-NEXT:    stxsiwx vs34, 0, r3
+; CHECK-P9-BE-NEXT:    blr
 entry:
   %vecext = extractelement <4 x float> %a, i32 1
   %arrayidx = getelementptr inbounds float, float* %ap, i64 3
@@ -173,24 +273,29 @@ entry:
 define <4 x float> @testf2(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 {
 ; CHECK-LABEL: testf2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 1
-; CHECK-NEXT:    xscvspdpn f0, vs0
-; CHECK-NEXT:    stfs f0, 12(r7)
+; CHECK-NEXT:    addi r3, r7, 12
+; CHECK-NEXT:    stxsiwx vs34, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testf2:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxswapd vs0, vs34
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    stfs f0, 12(r7)
+; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-BE-NEXT:    addi r3, r7, 12
+; CHECK-BE-NEXT:    stfiwx f0, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testf2:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    stfs f0, 12(r7)
+; CHECK-P9-NEXT:    addi r3, r7, 12
+; CHECK-P9-NEXT:    stxsiwx vs34, 0, r3
 ; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: testf2:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-BE-NEXT:    addi r3, r7, 12
+; CHECK-P9-BE-NEXT:    stfiwx f0, 0, r3
+; CHECK-P9-BE-NEXT:    blr
 entry:
   %vecext = extractelement <4 x float> %a, i32 2
   %arrayidx = getelementptr inbounds float, float* %ap, i64 3
@@ -202,22 +307,31 @@ entry:
 define <4 x float> @testf3(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 {
 ; CHECK-LABEL: testf3:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xscvspdpn f0, vs34
-; CHECK-NEXT:    stfs f0, 12(r7)
+; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-NEXT:    addi r3, r7, 12
+; CHECK-NEXT:    stfiwx f0, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testf3:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 3
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    stfs f0, 12(r7)
+; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-BE-NEXT:    addi r3, r7, 12
+; CHECK-BE-NEXT:    stfiwx f0, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testf3:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xscvspdpn f0, vs34
-; CHECK-P9-NEXT:    stfs f0, 12(r7)
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-P9-NEXT:    addi r3, r7, 12
+; CHECK-P9-NEXT:    stfiwx f0, 0, r3
 ; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: testf3:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-P9-BE-NEXT:    addi r3, r7, 12
+; CHECK-P9-BE-NEXT:    stfiwx f0, 0, r3
+; CHECK-P9-BE-NEXT:    blr
 entry:
   %vecext = extractelement <4 x float> %a, i32 3
   %arrayidx = getelementptr inbounds float, float* %ap, i64 3
@@ -229,24 +343,31 @@ entry:
 define <4 x i32> @testi0(<4 x i32> returned %a, <4 x i32> %b, i32* nocapture %ap) local_unnamed_addr #0 {
 ; CHECK-LABEL: testi0:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxswapd vs0, vs34
-; CHECK-NEXT:    mfvsrwz r3, f0
-; CHECK-NEXT:    stw r3, 12(r7)
+; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-NEXT:    addi r3, r7, 12
+; CHECK-NEXT:    stfiwx f0, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testi0:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
-; CHECK-BE-NEXT:    stw r3, 12(r7)
+; CHECK-BE-NEXT:    addi r3, r7, 12
+; CHECK-BE-NEXT:    stfiwx f0, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testi0:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
-; CHECK-P9-NEXT:    stw r3, 12(r7)
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-P9-NEXT:    addi r3, r7, 12
+; CHECK-P9-NEXT:    stfiwx f0, 0, r3
 ; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: testi0:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-P9-BE-NEXT:    addi r3, r7, 12
+; CHECK-P9-BE-NEXT:    stfiwx f0, 0, r3
+; CHECK-P9-BE-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %a, i32 0
   %arrayidx = getelementptr inbounds i32, i32* %ap, i64 3
@@ -259,22 +380,28 @@ define <4 x i32> @testi1(<4 x i32> retur
 ; CHECK-LABEL: testi1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 1
-; CHECK-NEXT:    mfvsrwz r3, f0
-; CHECK-NEXT:    stw r3, 12(r7)
+; CHECK-NEXT:    addi r3, r7, 12
+; CHECK-NEXT:    stfiwx f0, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testi1:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mfvsrwz r3, vs34
-; CHECK-BE-NEXT:    stw r3, 12(r7)
+; CHECK-BE-NEXT:    addi r3, r7, 12
+; CHECK-BE-NEXT:    stxsiwx vs34, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testi1:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li r3, 4
-; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
-; CHECK-P9-NEXT:    stw r3, 12(r7)
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-NEXT:    addi r3, r7, 12
+; CHECK-P9-NEXT:    stfiwx f0, 0, r3
 ; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: testi1:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    addi r3, r7, 12
+; CHECK-P9-BE-NEXT:    stxsiwx vs34, 0, r3
+; CHECK-P9-BE-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %a, i32 1
   %arrayidx = getelementptr inbounds i32, i32* %ap, i64 3
@@ -286,22 +413,29 @@ entry:
 define <4 x i32> @testi2(<4 x i32> returned %a, <4 x i32> %b, i32* nocapture %ap) local_unnamed_addr #0 {
 ; CHECK-LABEL: testi2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mfvsrwz r3, vs34
-; CHECK-NEXT:    stw r3, 12(r7)
+; CHECK-NEXT:    addi r3, r7, 12
+; CHECK-NEXT:    stxsiwx vs34, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testi2:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 1
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
-; CHECK-BE-NEXT:    stw r3, 12(r7)
+; CHECK-BE-NEXT:    addi r3, r7, 12
+; CHECK-BE-NEXT:    stfiwx f0, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testi2:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mfvsrwz r3, vs34
-; CHECK-P9-NEXT:    stw r3, 12(r7)
+; CHECK-P9-NEXT:    addi r3, r7, 12
+; CHECK-P9-NEXT:    stxsiwx vs34, 0, r3
 ; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: testi2:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-BE-NEXT:    addi r3, r7, 12
+; CHECK-P9-BE-NEXT:    stfiwx f0, 0, r3
+; CHECK-P9-BE-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %a, i32 2
   %arrayidx = getelementptr inbounds i32, i32* %ap, i64 3
@@ -314,23 +448,30 @@ define <4 x i32> @testi3(<4 x i32> retur
 ; CHECK-LABEL: testi3:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 3
-; CHECK-NEXT:    mfvsrwz r3, f0
-; CHECK-NEXT:    stw r3, 12(r7)
+; CHECK-NEXT:    addi r3, r7, 12
+; CHECK-NEXT:    stfiwx f0, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testi3:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxswapd vs0, vs34
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
-; CHECK-BE-NEXT:    stw r3, 12(r7)
+; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-BE-NEXT:    addi r3, r7, 12
+; CHECK-BE-NEXT:    stfiwx f0, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testi3:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li r3, 12
-; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
-; CHECK-P9-NEXT:    stw r3, 12(r7)
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-P9-NEXT:    addi r3, r7, 12
+; CHECK-P9-NEXT:    stfiwx f0, 0, r3
 ; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: testi3:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-P9-BE-NEXT:    addi r3, r7, 12
+; CHECK-P9-BE-NEXT:    stfiwx f0, 0, r3
+; CHECK-P9-BE-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %a, i32 3
   %arrayidx = getelementptr inbounds i32, i32* %ap, i64 3

Modified: llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll?rev=352131&r1=352130&r2=352131&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll Thu Jan 24 15:44:28 2019
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
 ; RUN:    -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
@@ -15,11 +16,10 @@ define void @test_liwzx1(<1 x float>* %A
 ; P9LE-NEXT:    xxpermdi vs0, f0, f0, 2
 ; P9LE-NEXT:    xxpermdi vs1, f1, f1, 2
 ; P9LE-NEXT:    xvaddsp vs0, vs0, vs1
-; P9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
-; P9LE-NEXT:    xscvspdpn f0, vs0
-; P9LE-NEXT:    stfs f0, 0(r5)
+; P9LE-NEXT:    xxsldwi vs0, vs0, vs0, 2
+; P9LE-NEXT:    stfiwx f0, 0, r5
 ; P9LE-NEXT:    blr
-
+;
 ; P9BE-LABEL: test_liwzx1:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    lfiwzx f0, 0, r3
@@ -27,10 +27,10 @@ define void @test_liwzx1(<1 x float>* %A
 ; P9BE-NEXT:    xxsldwi vs0, f0, f0, 1
 ; P9BE-NEXT:    xxsldwi vs1, f1, f1, 1
 ; P9BE-NEXT:    xvaddsp vs0, vs0, vs1
-; P9BE-NEXT:    xscvspdpn f0, vs0
-; P9BE-NEXT:    stfs f0, 0(r5)
+; P9BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; P9BE-NEXT:    stfiwx f0, 0, r5
 ; P9BE-NEXT:    blr
-
+;
 ; P8LE-LABEL: test_liwzx1:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    lfiwzx f0, 0, r3
@@ -38,11 +38,10 @@ define void @test_liwzx1(<1 x float>* %A
 ; P8LE-NEXT:    xxpermdi vs0, f0, f0, 2
 ; P8LE-NEXT:    xxpermdi vs1, f1, f1, 2
 ; P8LE-NEXT:    xvaddsp vs0, vs0, vs1
-; P8LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
-; P8LE-NEXT:    xscvspdpn f0, vs0
-; P8LE-NEXT:    stfsx f0, 0, r5
+; P8LE-NEXT:    xxsldwi vs0, vs0, vs0, 2
+; P8LE-NEXT:    stfiwx f0, 0, r5
 ; P8LE-NEXT:    blr
-
+;
 ; P8BE-LABEL: test_liwzx1:
 ; P8BE:       # %bb.0:
 ; P8BE-NEXT:    lfiwzx f0, 0, r3
@@ -50,9 +49,12 @@ define void @test_liwzx1(<1 x float>* %A
 ; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
 ; P8BE-NEXT:    xxsldwi vs1, f1, f1, 1
 ; P8BE-NEXT:    xvaddsp vs0, vs0, vs1
-; P8BE-NEXT:    xscvspdpn f0, vs0
-; P8BE-NEXT:    stfsx f0, 0, r5
+; P8BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; P8BE-NEXT:    stfiwx f0, 0, r5
 ; P8BE-NEXT:    blr
+
+
+
   %a = load <1 x float>, <1 x float>* %A
   %b = load <1 x float>, <1 x float>* %B
   %X = fadd <1 x float> %a, %b
@@ -68,12 +70,11 @@ define <1 x float>* @test_liwzx2(<1 x fl
 ; P9LE-NEXT:    xxpermdi vs0, f0, f0, 2
 ; P9LE-NEXT:    xxpermdi vs1, f1, f1, 2
 ; P9LE-NEXT:    xvsubsp vs0, vs0, vs1
-; P9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
-; P9LE-NEXT:    xscvspdpn f0, vs0
+; P9LE-NEXT:    xxsldwi vs0, vs0, vs0, 2
 ; P9LE-NEXT:    mr r3, r5
-; P9LE-NEXT:    stfs f0, 0(r5)
+; P9LE-NEXT:    stfiwx f0, 0, r5
 ; P9LE-NEXT:    blr
-
+;
 ; P9BE-LABEL: test_liwzx2:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    lfiwzx f0, 0, r3
@@ -81,11 +82,11 @@ define <1 x float>* @test_liwzx2(<1 x fl
 ; P9BE-NEXT:    xxsldwi vs0, f0, f0, 1
 ; P9BE-NEXT:    xxsldwi vs1, f1, f1, 1
 ; P9BE-NEXT:    xvsubsp vs0, vs0, vs1
-; P9BE-NEXT:    xscvspdpn f0, vs0
+; P9BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
 ; P9BE-NEXT:    mr r3, r5
-; P9BE-NEXT:    stfs f0, 0(r5)
+; P9BE-NEXT:    stfiwx f0, 0, r5
 ; P9BE-NEXT:    blr
-
+;
 ; P8LE-LABEL: test_liwzx2:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    lfiwzx f0, 0, r3
@@ -94,11 +95,10 @@ define <1 x float>* @test_liwzx2(<1 x fl
 ; P8LE-NEXT:    xxpermdi vs0, f0, f0, 2
 ; P8LE-NEXT:    xxpermdi vs1, f1, f1, 2
 ; P8LE-NEXT:    xvsubsp vs0, vs0, vs1
-; P8LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
-; P8LE-NEXT:    xscvspdpn f0, vs0
-; P8LE-NEXT:    stfsx f0, 0, r5
+; P8LE-NEXT:    xxsldwi vs0, vs0, vs0, 2
+; P8LE-NEXT:    stfiwx f0, 0, r5
 ; P8LE-NEXT:    blr
-
+;
 ; P8BE-LABEL: test_liwzx2:
 ; P8BE:       # %bb.0:
 ; P8BE-NEXT:    lfiwzx f0, 0, r3
@@ -107,9 +107,14 @@ define <1 x float>* @test_liwzx2(<1 x fl
 ; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
 ; P8BE-NEXT:    xxsldwi vs1, f1, f1, 1
 ; P8BE-NEXT:    xvsubsp vs0, vs0, vs1
-; P8BE-NEXT:    xscvspdpn f0, vs0
-; P8BE-NEXT:    stfsx f0, 0, r5
+; P8BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; P8BE-NEXT:    stfiwx f0, 0, r5
 ; P8BE-NEXT:    blr
+
+
+
+
+
   %a = load <1 x float>, <1 x float>* %A
   %b = load <1 x float>, <1 x float>* %B
   %X = fsub <1 x float> %a, %b




More information about the llvm-commits mailing list