[llvm] 4195ed9 - [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

Albion Fung via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 30 12:31:11 PDT 2021


Author: Albion Fung
Date: 2021-09-30T14:31:00-05:00
New Revision: 4195ed995993410b4a8ba2835fee71d4fe01c410

URL: https://github.com/llvm/llvm-project/commit/4195ed995993410b4a8ba2835fee71d4fe01c410
DIFF: https://github.com/llvm/llvm-project/commit/4195ed995993410b4a8ba2835fee71d4fe01c410.diff

LOG: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

This patch removes the uneccessary mf/mtvsr generated in conjunction
with xscvdpsxws/xscvdpuxws.

Differential revision: https://reviews.llvm.org/D109902

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/test/CodeGen/PowerPC/test-vector-insert.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 0855b0186dab8..b00e58010b06b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2809,6 +2809,10 @@ def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)),
 def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)),
           (v2i64 (XXPERMDI (SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64),
                            (SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64), 0))>;
+def : Pat<(v4i32 (PPCSToV DblToInt.A)),
+          (v4i32 (SUBREG_TO_REG (i64 1), (XSCVDPSXWS f64:$A), sub_64))>;
+def : Pat<(v4i32 (PPCSToV DblToUInt.A)),
+          (v4i32 (SUBREG_TO_REG (i64 1), (XSCVDPUXWS f64:$A), sub_64))>;
 defm : ScalToVecWPermute<
   v4i32, FltToIntLoad.A,
   (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWSs (XFLOADf32 ForceXForm:$A)), sub_64), 1),
@@ -4138,12 +4142,52 @@ def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
           (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
           (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 0)),
+          (v4i32 (XXINSERTW v4i32:$A,
+                            (SUBREG_TO_REG (i64 1),
+                                           (XSCVDPSXWS f64:$B), sub_64),
+                            0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 0)),
+          (v4i32 (XXINSERTW v4i32:$A,
+                            (SUBREG_TO_REG (i64 1),
+                                           (XSCVDPUXWS f64:$B), sub_64),
+                            0))>;
 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
           (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 1)),
+          (v4i32 (XXINSERTW v4i32:$A,
+                            (SUBREG_TO_REG (i64 1),
+                                           (XSCVDPSXWS f64:$B), sub_64),
+                            4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 1)),
+          (v4i32 (XXINSERTW v4i32:$A,
+                            (SUBREG_TO_REG (i64 1),
+                                           (XSCVDPUXWS f64:$B), sub_64),
+                            4))>;
 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
           (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 2)),
+          (v4i32 (XXINSERTW v4i32:$A,
+                            (SUBREG_TO_REG (i64 1),
+                                           (XSCVDPSXWS f64:$B), sub_64),
+                            8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 2)),
+          (v4i32 (XXINSERTW v4i32:$A,
+                            (SUBREG_TO_REG (i64 1),
+                                           (XSCVDPUXWS f64:$B), sub_64),
+                            8))>;
 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
           (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 3)),
+          (v4i32 (XXINSERTW v4i32:$A,
+                            (SUBREG_TO_REG (i64 1),
+                                           (XSCVDPSXWS f64:$B), sub_64),
+                            12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 3)),
+          (v4i32 (XXINSERTW v4i32:$A,
+                            (SUBREG_TO_REG (i64 1),
+                                           (XSCVDPUXWS f64:$B), sub_64),
+                            12))>;
 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
           (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
@@ -4382,12 +4426,52 @@ def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
           (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
           (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 0)),
+          (v4i32 (XXINSERTW v4i32:$A,
+                            (SUBREG_TO_REG (i64 1),
+                                           (XSCVDPSXWS f64:$B), sub_64),
+                            12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 0)),
+          (v4i32 (XXINSERTW v4i32:$A,
+                            (SUBREG_TO_REG (i64 1),
+                                           (XSCVDPUXWS f64:$B), sub_64),
+                            12))>;
 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
           (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 1)),
+          (v4i32 (XXINSERTW v4i32:$A,
+                            (SUBREG_TO_REG (i64 1),
+                                           (XSCVDPSXWS f64:$B), sub_64),
+                            8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 1)),
+          (v4i32 (XXINSERTW v4i32:$A,
+                            (SUBREG_TO_REG (i64 1),
+                                           (XSCVDPUXWS f64:$B), sub_64),
+                            8))>;
 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
           (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 2)),
+          (v4i32 (XXINSERTW v4i32:$A,
+                            (SUBREG_TO_REG (i64 1),
+                                           (XSCVDPSXWS f64:$B), sub_64),
+                            4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 2)),
+          (v4i32 (XXINSERTW v4i32:$A,
+                            (SUBREG_TO_REG (i64 1),
+                                           (XSCVDPUXWS f64:$B), sub_64),
+                            4))>;
 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
           (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 3)),
+          (v4i32 (XXINSERTW v4i32:$A,
+                            (SUBREG_TO_REG (i64 1),
+                                           (XSCVDPSXWS f64:$B), sub_64),
+                            0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 3)),
+          (v4i32 (XXINSERTW v4i32:$A,
+                            (SUBREG_TO_REG (i64 1),
+                                           (XSCVDPUXWS f64:$B), sub_64),
+                            0))>;
 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
           (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),

diff  --git a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
index edf49ccb53536..4c4d7be624cb8 100644
--- a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
+++ b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -17,8 +17,8 @@
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:  -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
 ; RUN:  -mcpu=pwr9 < %s | FileCheck %s --check-prefix=CHECK-BE-P9
-; xscvdpsxws and uxws is only available on Power7 and above
-; Codgen is 
diff erent for LE Power7 and Power8
+; xscvdpsxws and xscvdpsxws is only available on Power7 and above
+; Codgen is 
diff erent for Power7, Power8, and Power9.
 
 define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
 ; CHECK-LE-P7-LABEL: test:
@@ -38,20 +38,16 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
 ;
 ; CHECK-LE-P8-LABEL: test:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    xscvdpsxws f0, f1
+; CHECK-LE-P8-NEXT:    xscvdpsxws v3, f1
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-LE-P8-NEXT:    lvx v3, 0, r3
-; CHECK-LE-P8-NEXT:    mffprwz r4, f0
-; CHECK-LE-P8-NEXT:    mtvsrwz v4, r4
-; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT:    lvx v4, 0, r3
+; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    xscvdpsxws f0, f1
-; CHECK-LE-P9-NEXT:    mffprwz r3, f0
-; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P9-NEXT:    xxinsertw v2, vs0, 0
 ; CHECK-LE-P9-NEXT:    blr
 ;
@@ -70,9 +66,7 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
 ;
 ; CHECK-BE-P8-LABEL: test:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-BE-P8-NEXT:    mffprwz r3, f0
-; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT:    xscvdpsxws v3, f1
 ; CHECK-BE-P8-NEXT:    vmrghw v3, v2, v3
 ; CHECK-BE-P8-NEXT:    xxsldwi vs0, v3, v2, 3
 ; CHECK-BE-P8-NEXT:    xxsldwi v2, vs0, vs0, 1
@@ -81,8 +75,6 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
 ; CHECK-BE-P9-LABEL: test:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    xscvdpsxws f0, f1
-; CHECK-BE-P9-NEXT:    mffprwz r3, f0
-; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P9-NEXT:    xxinsertw v2, vs0, 12
 ; CHECK-BE-P9-NEXT:    blr
 entry:
@@ -109,20 +101,16 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
 ;
 ; CHECK-LE-P8-LABEL: test2:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    xscvdpsxws f0, f1
+; CHECK-LE-P8-NEXT:    xscvdpsxws v3, f1
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-LE-P8-NEXT:    lvx v3, 0, r3
-; CHECK-LE-P8-NEXT:    mffprwz r4, f0
-; CHECK-LE-P8-NEXT:    mtvsrwz v4, r4
-; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT:    lvx v4, 0, r3
+; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test2:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    xscvdpsxws f0, f1
-; CHECK-LE-P9-NEXT:    mffprwz r3, f0
-; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P9-NEXT:    xxinsertw v2, vs0, 0
 ; CHECK-LE-P9-NEXT:    blr
 ;
@@ -141,9 +129,7 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
 ;
 ; CHECK-BE-P8-LABEL: test2:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-BE-P8-NEXT:    mffprwz r3, f0
-; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT:    xscvdpsxws v3, f1
 ; CHECK-BE-P8-NEXT:    vmrghw v3, v2, v3
 ; CHECK-BE-P8-NEXT:    xxsldwi vs0, v3, v2, 3
 ; CHECK-BE-P8-NEXT:    xxsldwi v2, vs0, vs0, 1
@@ -152,8 +138,6 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
 ; CHECK-BE-P9-LABEL: test2:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    xscvdpsxws f0, f1
-; CHECK-BE-P9-NEXT:    mffprwz r3, f0
-; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P9-NEXT:    xxinsertw v2, vs0, 12
 ; CHECK-BE-P9-NEXT:    blr
 entry:
@@ -180,20 +164,16 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
 ;
 ; CHECK-LE-P8-LABEL: test3:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    xscvdpuxws f0, f1
+; CHECK-LE-P8-NEXT:    xscvdpuxws v3, f1
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-LE-P8-NEXT:    lvx v3, 0, r3
-; CHECK-LE-P8-NEXT:    mffprwz r4, f0
-; CHECK-LE-P8-NEXT:    mtvsrwz v4, r4
-; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT:    lvx v4, 0, r3
+; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test3:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    xscvdpuxws f0, f1
-; CHECK-LE-P9-NEXT:    mffprwz r3, f0
-; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P9-NEXT:    xxinsertw v2, vs0, 0
 ; CHECK-LE-P9-NEXT:    blr
 ;
@@ -212,9 +192,7 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
 ;
 ; CHECK-BE-P8-LABEL: test3:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    xscvdpuxws f0, f1
-; CHECK-BE-P8-NEXT:    mffprwz r3, f0
-; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT:    xscvdpuxws v3, f1
 ; CHECK-BE-P8-NEXT:    vmrghw v3, v2, v3
 ; CHECK-BE-P8-NEXT:    xxsldwi vs0, v3, v2, 3
 ; CHECK-BE-P8-NEXT:    xxsldwi v2, vs0, vs0, 1
@@ -223,8 +201,6 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
 ; CHECK-BE-P9-LABEL: test3:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    xscvdpuxws f0, f1
-; CHECK-BE-P9-NEXT:    mffprwz r3, f0
-; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P9-NEXT:    xxinsertw v2, vs0, 12
 ; CHECK-BE-P9-NEXT:    blr
 entry:
@@ -251,20 +227,16 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
 ;
 ; CHECK-LE-P8-LABEL: test4:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    xscvdpuxws f0, f1
+; CHECK-LE-P8-NEXT:    xscvdpuxws v3, f1
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-LE-P8-NEXT:    lvx v3, 0, r3
-; CHECK-LE-P8-NEXT:    mffprwz r4, f0
-; CHECK-LE-P8-NEXT:    mtvsrwz v4, r4
-; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT:    lvx v4, 0, r3
+; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test4:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    xscvdpuxws f0, f1
-; CHECK-LE-P9-NEXT:    mffprwz r3, f0
-; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P9-NEXT:    xxinsertw v2, vs0, 0
 ; CHECK-LE-P9-NEXT:    blr
 ;
@@ -283,9 +255,7 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
 ;
 ; CHECK-BE-P8-LABEL: test4:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    xscvdpuxws f0, f1
-; CHECK-BE-P8-NEXT:    mffprwz r3, f0
-; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT:    xscvdpuxws v3, f1
 ; CHECK-BE-P8-NEXT:    vmrghw v3, v2, v3
 ; CHECK-BE-P8-NEXT:    xxsldwi vs0, v3, v2, 3
 ; CHECK-BE-P8-NEXT:    xxsldwi v2, vs0, vs0, 1
@@ -294,8 +264,6 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
 ; CHECK-BE-P9-LABEL: test4:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    xscvdpuxws f0, f1
-; CHECK-BE-P9-NEXT:    mffprwz r3, f0
-; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P9-NEXT:    xxinsertw v2, vs0, 12
 ; CHECK-BE-P9-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
index caf483e45b602..242e9966827a2 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -13,12 +13,8 @@ define i64 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    xscvdpuxws f1, v2
-; CHECK-P8-NEXT:    xscvdpuxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mtvsrwz v2, r3
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    mtvsrwz v3, r4
+; CHECK-P8-NEXT:    xscvdpuxws v2, v2
+; CHECK-P8-NEXT:    xscvdpuxws v3, f0
 ; CHECK-P8-NEXT:    vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -26,26 +22,18 @@ define i64 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xscvdpuxws f0, v2
-; CHECK-P9-NEXT:    mffprwz r3, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    mtvsrwz v3, r3
-; CHECK-P9-NEXT:    xscvdpuxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrwz v2, r3
+; CHECK-P9-NEXT:    xscvdpuxws v3, v2
+; CHECK-P9-NEXT:    xscvdpuxws v2, f0
 ; CHECK-P9-NEXT:    vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xscvdpuxws f0, v2
-; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
-; CHECK-BE-NEXT:    xscvdpuxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    xscvdpuxws v3, v2
+; CHECK-BE-NEXT:    xscvdpuxws v2, f0
 ; CHECK-BE-NEXT:    vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
@@ -305,12 +293,8 @@ define i64 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    xscvdpsxws f1, v2
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mtvsrwz v2, r3
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    mtvsrwz v3, r4
+; CHECK-P8-NEXT:    xscvdpsxws v2, v2
+; CHECK-P8-NEXT:    xscvdpsxws v3, f0
 ; CHECK-P8-NEXT:    vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -318,26 +302,18 @@ define i64 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xscvdpsxws f0, v2
-; CHECK-P9-NEXT:    mffprwz r3, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    mtvsrwz v3, r3
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrwz v2, r3
+; CHECK-P9-NEXT:    xscvdpsxws v3, v2
+; CHECK-P9-NEXT:    xscvdpsxws v2, f0
 ; CHECK-P9-NEXT:    vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xscvdpsxws f0, v2
-; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    xscvdpsxws v3, v2
+; CHECK-BE-NEXT:    xscvdpsxws v2, f0
 ; CHECK-BE-NEXT:    vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr


        


More information about the llvm-commits mailing list