[llvm] db26cd3 - [PowerPC] Improve f32 to i32 bitcast code gen

Mon May 31 14:01:11 PDT 2021

Author: Albion Fung
Date: 2021-05-31T16:00:58-05:00
New Revision: db26cd30b6dd65e88d786e97a1e453af5cd48966

URL: https://github.com/llvm/llvm-project/commit/db26cd30b6dd65e88d786e97a1e453af5cd48966
DIFF: https://github.com/llvm/llvm-project/commit/db26cd30b6dd65e88d786e97a1e453af5cd48966.diff

LOG: [PowerPC] Improve f32 to i32 bitcast code gen

The code gen for f32 to i32 bitcast is not currently the most efficient;
this patch removes some unneccessary instructions gerneated.

Differential revision: https://reviews.llvm.org/D100782

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
    llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
    llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll
    llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
    llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
    llvm/test/CodeGen/PowerPC/vec_insert_elt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 09e1e23ca798d..3198424507683 100644

--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1816,8 +1816,7 @@ let PPC970_Single = 1, AddedComplexity = 400 in {
 
 // Output dag used to bitcast f32 to i32 and f64 to i64
 def Bitcast {
-  dag FltToInt = (i32 (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI (XSCVDPSPN $A),
-                      (XSCVDPSPN $A), 3), sub_64)));
+  dag FltToInt = (i32 (MFVSRWZ (EXTRACT_SUBREG (XSCVDPSPN $A), sub_64)));
   dag DblToLong = (i64 (MFVSRD $A));
 }
 
@@ -2212,7 +2211,7 @@ def VectorExtractions {
 }
 
 def AlignValues {
-  dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3));
+  dag F32_TO_BE_WORD1 = (v4f32 (XSCVDPSPN $B));
   dag I32_TO_BE_WORD1 = (SUBREG_TO_REG (i64 1), (MTVSRWZ $B), sub_64);
 }
 
@@ -2817,6 +2816,10 @@ defm : ScalToVecWPermute<
   v4i32, FltToUIntLoad.A,
   (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 ForceXForm:$A)), sub_64), 1),
   (SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 ForceXForm:$A)), sub_64)>;
+def : Pat<(v4f32 (build_vector (f32 (fpround f64:$A)), (f32 (fpround f64:$A)),
+                               (f32 (fpround f64:$A)), (f32 (fpround f64:$A)))),
+          (v4f32 (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$A), sub_64), 0))>;
+
 def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
           (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
 def : Pat<(v2f64 (PPCldsplat ForceXForm:$A)),
@@ -4135,6 +4138,19 @@ def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
           (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
 
+def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 0)),
+          (v4f32 (XXINSERTW v4f32:$A,
+                  (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 0))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 1)),
+          (v4f32 (XXINSERTW v4f32:$A,
+                  (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 4))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 2)),
+          (v4f32 (XXINSERTW v4f32:$A,
+                  (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 8))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 3)),
+          (v4f32 (XXINSERTW v4f32:$A,
+                  (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 12))>;
+
 // Scalar stores of i8
 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), ForceXForm:$dst),
           (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), ForceXForm:$dst)>;
@@ -4366,6 +4382,19 @@ def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
           (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
 
+def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 0)),
+          (v4f32 (XXINSERTW v4f32:$A,
+                  (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 12))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 1)),
+          (v4f32 (XXINSERTW v4f32:$A,
+                  (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 8))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 2)),
+          (v4f32 (XXINSERTW v4f32:$A,
+                  (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 4))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 3)),
+          (v4f32 (XXINSERTW v4f32:$A,
+                  (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 0))>;
+
 def : Pat<(v8i16 (PPCld_vec_be ForceXForm:$src)),
           (COPY_TO_REGCLASS (LXVH8X ForceXForm:$src), VRRC)>;
 def : Pat<(PPCst_vec_be v8i16:$rS, ForceXForm:$dst),

diff  --git a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
index 31c583c9b2c63..76d76ecae7312 100644
--- a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
@@ -743,14 +743,12 @@ define <4 x float> @_Z10testInsEltILj0EDv4_ffET0_S1_T1_(<4 x float> %a, float %b
 ; CHECK-64-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    xscvdpspn 0, 1
-; CHECK-64-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-64-NEXT:    xxinsertw 34, 0, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_:
 ; CHECK-32:       # %bb.0: # %entry
 ; CHECK-32-NEXT:    xscvdpspn 0, 1
-; CHECK-32-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-32-NEXT:    xxinsertw 34, 0, 0
 ; CHECK-32-NEXT:    blr
 entry:
@@ -762,14 +760,12 @@ define <4 x float> @_Z10testInsEltILj1EDv4_ffET0_S1_T1_(<4 x float> %a, float %b
 ; CHECK-64-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    xscvdpspn 0, 1
-; CHECK-64-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-64-NEXT:    xxinsertw 34, 0, 4
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_:
 ; CHECK-32:       # %bb.0: # %entry
 ; CHECK-32-NEXT:    xscvdpspn 0, 1
-; CHECK-32-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-32-NEXT:    xxinsertw 34, 0, 4
 ; CHECK-32-NEXT:    blr
 entry:
@@ -781,14 +777,12 @@ define <4 x float> @_Z10testInsEltILj2EDv4_ffET0_S1_T1_(<4 x float> %a, float %b
 ; CHECK-64-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    xscvdpspn 0, 1
-; CHECK-64-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-64-NEXT:    xxinsertw 34, 0, 8
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_:
 ; CHECK-32:       # %bb.0: # %entry
 ; CHECK-32-NEXT:    xscvdpspn 0, 1
-; CHECK-32-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-32-NEXT:    xxinsertw 34, 0, 8
 ; CHECK-32-NEXT:    blr
 entry:
@@ -800,14 +794,12 @@ define <4 x float> @_Z10testInsEltILj3EDv4_ffET0_S1_T1_(<4 x float> %a, float %b
 ; CHECK-64-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    xscvdpspn 0, 1
-; CHECK-64-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-64-NEXT:    xxinsertw 34, 0, 12
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_:
 ; CHECK-32:       # %bb.0: # %entry
 ; CHECK-32-NEXT:    xscvdpspn 0, 1
-; CHECK-32-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-32-NEXT:    xxinsertw 34, 0, 12
 ; CHECK-32-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
index 26dd16121feca..abcd8ff39ddab 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
@@ -262,8 +262,8 @@ entry:
 define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
 ; CHECK-64-LABEL: testFloat1:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    rlwinm 3, 4, 2, 28, 29
-; CHECK-64-NEXT:    addi 4, 1, -16
+; CHECK-64-DAG:     rlwinm 3, 4, 2, 28, 29
+; CHECK-64-DAG:     addi 4, 1, -16
 ; CHECK-64-NEXT:    stxv 34, -16(1)
 ; CHECK-64-NEXT:    stfsx 1, 4, 3
 ; CHECK-64-NEXT:    lxv 34, -16(1)
@@ -281,17 +281,15 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
 ; CHECK-64-P10-LABEL: testFloat1:
 ; CHECK-64-P10:       # %bb.0: # %entry
 ; CHECK-64-P10-NEXT:    xscvdpspn 0, 1
-; CHECK-64-P10-NEXT:    extsw 3, 4
-; CHECK-64-P10-NEXT:    slwi 3, 3, 2
-; CHECK-64-P10-NEXT:    xxsldwi 0, 0, 0, 3
-; CHECK-64-P10-NEXT:    mffprwz 4, 0
-; CHECK-64-P10-NEXT:    vinswlx 2, 3, 4
+; CHECK-64-P10-NEXT:    extsw 4, 4
+; CHECK-64-P10-NEXT:    slwi 4, 4, 2
+; CHECK-64-P10-NEXT:    mffprwz 3, 0
+; CHECK-64-P10-NEXT:    vinswlx 2, 4, 3
 ; CHECK-64-P10-NEXT:    blr
 ;
 ; CHECK-32-P10-LABEL: testFloat1:
 ; CHECK-32-P10:       # %bb.0: # %entry
 ; CHECK-32-P10-NEXT:    xscvdpspn 0, 1
-; CHECK-32-P10-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-32-P10-NEXT:    mffprwz 3, 0
 ; CHECK-32-P10-NEXT:    vinswlx 2, 4, 3
 ; CHECK-32-P10-NEXT:    blr
@@ -304,8 +302,8 @@ define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze
 ; CHECK-64-LABEL: testFloat2:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    lwz 6, 0(3)
-; CHECK-64-NEXT:    rlwinm 4, 4, 2, 28, 29
-; CHECK-64-NEXT:    addi 7, 1, -32
+; CHECK-64-DAG:     rlwinm 4, 4, 2, 28, 29
+; CHECK-64-DAG:     addi 7, 1, -32
 ; CHECK-64-NEXT:    stxv 34, -32(1)
 ; CHECK-64-NEXT:    stwx 6, 7, 4
 ; CHECK-64-NEXT:    rlwinm 4, 5, 2, 28, 29
@@ -367,8 +365,8 @@ define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze
 ; CHECK-64-LABEL: testFloat3:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    lis 6, 1
-; CHECK-64-NEXT:    rlwinm 4, 4, 2, 28, 29
-; CHECK-64-NEXT:    addi 7, 1, -32
+; CHECK-64-DAG:         rlwinm 4, 4, 2, 28, 29
+; CHECK-64-DAG:    addi 7, 1, -32
 ; CHECK-64-NEXT:    lwzx 6, 3, 6
 ; CHECK-64-NEXT:    stxv 34, -32(1)
 ; CHECK-64-NEXT:    stwx 6, 7, 4
@@ -440,7 +438,6 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
 ; CHECK-64-LABEL: testFloatImm1:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    xscvdpspn 0, 1
-; CHECK-64-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-64-NEXT:    xxinsertw 34, 0, 0
 ; CHECK-64-NEXT:    xxinsertw 34, 0, 8
 ; CHECK-64-NEXT:    blr
@@ -448,7 +445,6 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
 ; CHECK-32-LABEL: testFloatImm1:
 ; CHECK-32:       # %bb.0: # %entry
 ; CHECK-32-NEXT:    xscvdpspn 0, 1
-; CHECK-32-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-32-NEXT:    xxinsertw 34, 0, 0
 ; CHECK-32-NEXT:    xxinsertw 34, 0, 8
 ; CHECK-32-NEXT:    blr
@@ -456,7 +452,6 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
 ; CHECK-64-P10-LABEL: testFloatImm1:
 ; CHECK-64-P10:       # %bb.0: # %entry
 ; CHECK-64-P10-NEXT:    xscvdpspn 0, 1
-; CHECK-64-P10-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-64-P10-NEXT:    xxinsertw 34, 0, 0
 ; CHECK-64-P10-NEXT:    xxinsertw 34, 0, 8
 ; CHECK-64-P10-NEXT:    blr
@@ -464,7 +459,6 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
 ; CHECK-32-P10-LABEL: testFloatImm1:
 ; CHECK-32-P10:       # %bb.0: # %entry
 ; CHECK-32-P10-NEXT:    xscvdpspn 0, 1
-; CHECK-32-P10-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-32-P10-NEXT:    xxinsertw 34, 0, 0
 ; CHECK-32-P10-NEXT:    xxinsertw 34, 0, 8
 ; CHECK-32-P10-NEXT:    blr
@@ -479,11 +473,9 @@ define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) {
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    lfs 0, 0(3)
 ; CHECK-64-NEXT:    xscvdpspn 0, 0
-; CHECK-64-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-64-NEXT:    xxinsertw 34, 0, 0
 ; CHECK-64-NEXT:    lfs 0, 4(3)
 ; CHECK-64-NEXT:    xscvdpspn 0, 0
-; CHECK-64-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-64-NEXT:    xxinsertw 34, 0, 8
 ; CHECK-64-NEXT:    blr
 ;
@@ -491,11 +483,9 @@ define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) {
 ; CHECK-32:       # %bb.0: # %entry
 ; CHECK-32-NEXT:    lfs 0, 0(3)
 ; CHECK-32-NEXT:    xscvdpspn 0, 0
-; CHECK-32-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-32-NEXT:    xxinsertw 34, 0, 0
 ; CHECK-32-NEXT:    lfs 0, 4(3)
 ; CHECK-32-NEXT:    xscvdpspn 0, 0
-; CHECK-32-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-32-NEXT:    xxinsertw 34, 0, 8
 ; CHECK-32-NEXT:    blr
 ;
@@ -533,11 +523,9 @@ define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) {
 ; CHECK-64-NEXT:    li 4, 1
 ; CHECK-64-NEXT:    rldic 4, 4, 38, 25
 ; CHECK-64-NEXT:    xscvdpspn 0, 0
-; CHECK-64-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-64-NEXT:    xxinsertw 34, 0, 0
 ; CHECK-64-NEXT:    lfsx 0, 3, 4
 ; CHECK-64-NEXT:    xscvdpspn 0, 0
-; CHECK-64-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-64-NEXT:    xxinsertw 34, 0, 8
 ; CHECK-64-NEXT:    blr
 ;
@@ -546,11 +534,9 @@ define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) {
 ; CHECK-32-NEXT:    lis 4, 4
 ; CHECK-32-NEXT:    lfsx 0, 3, 4
 ; CHECK-32-NEXT:    xscvdpspn 0, 0
-; CHECK-32-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-32-NEXT:    xxinsertw 34, 0, 0
 ; CHECK-32-NEXT:    lfs 0, 0(3)
 ; CHECK-32-NEXT:    xscvdpspn 0, 0
-; CHECK-32-NEXT:    xxsldwi 0, 0, 0, 3
 ; CHECK-32-NEXT:    xxinsertw 34, 0, 8
 ; CHECK-32-NEXT:    blr
 ;
@@ -589,7 +575,7 @@ entry:
 define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) {
 ; CHECK-64-LABEL: testDouble1:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    rlwinm 3, 4, 3, 28, 28
+; CHECK-64:         rlwinm 3, 4, 3, 28, 28
 ; CHECK-64-NEXT:    addi 4, 1, -16
 ; CHECK-64-NEXT:    stxv 34, -16(1)
 ; CHECK-64-NEXT:    stfdx 1, 4, 3
@@ -615,8 +601,8 @@ define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1)
 ;
 ; CHECK-32-P10-LABEL: testDouble1:
 ; CHECK-32-P10:       # %bb.0: # %entry
-; CHECK-32-P10-NEXT:    addi 4, 1, -16
-; CHECK-32-P10-NEXT:    rlwinm 3, 5, 3, 28, 28
+; CHECK-32-P10-DAG:     addi 4, 1, -16
+; CHECK-32-P10-DAG:     rlwinm 3, 5, 3, 28, 28
 ; CHECK-32-P10-NEXT:    stxv 34, -16(1)
 ; CHECK-32-P10-NEXT:    stfdx 1, 4, 3
 ; CHECK-32-P10-NEXT:    lxv 34, -16(1)
@@ -630,8 +616,8 @@ define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32
 ; CHECK-64-LABEL: testDouble2:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    ld 6, 0(3)
-; CHECK-64-NEXT:    rlwinm 4, 4, 3, 28, 28
-; CHECK-64-NEXT:    addi 7, 1, -32
+; CHECK-64-DAG:         rlwinm 4, 4, 3, 28, 28
+; CHECK-64-DAG:    addi 7, 1, -32
 ; CHECK-64-NEXT:    stxv 34, -32(1)
 ; CHECK-64-NEXT:    stdx 6, 7, 4
 ; CHECK-64-NEXT:    li 4, 1
@@ -675,8 +661,8 @@ define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32
 ; CHECK-32-P10-LABEL: testDouble2:
 ; CHECK-32-P10:       # %bb.0: # %entry
 ; CHECK-32-P10-NEXT:    lfd 0, 0(3)
-; CHECK-32-P10-NEXT:    addi 6, 1, -32
-; CHECK-32-P10-NEXT:    rlwinm 4, 4, 3, 28, 28
+; CHECK-32-P10-DAG:     addi 6, 1, -32
+; CHECK-32-P10-DAG:     rlwinm 4, 4, 3, 28, 28
 ; CHECK-32-P10-NEXT:    stxv 34, -32(1)
 ; CHECK-32-P10-NEXT:    rlwinm 5, 5, 3, 28, 28
 ; CHECK-32-P10-NEXT:    stfdx 0, 6, 4
@@ -702,8 +688,8 @@ define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32
 ; CHECK-64-LABEL: testDouble3:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    lis 6, 1
-; CHECK-64-NEXT:    rlwinm 4, 4, 3, 28, 28
-; CHECK-64-NEXT:    addi 7, 1, -32
+; CHECK-64-DAG:     rlwinm 4, 4, 3, 28, 28
+; CHECK-64-DAG:     addi 7, 1, -32
 ; CHECK-64-NEXT:    ldx 6, 3, 6
 ; CHECK-64-NEXT:    stxv 34, -32(1)
 ; CHECK-64-NEXT:    stdx 6, 7, 4

diff  --git a/llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll b/llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll
index 26a0be8da4070..00a9ee052bc89 100644
--- a/llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll
+++ b/llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll
@@ -10,8 +10,8 @@ entry:
 ; CHECK-P7: stfs 1,
 ; CHECK-P7: lwa 3,
 ; CHECK: xscvdpspn [[CONVREG:[0-9]+]], 1
-; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[CONVREG]], [[CONVREG]], 3
-; CHECK: mffprwz 3, [[SHIFTREG]]
+; CHECK-NOT: xxsldwi
+; CHECK: mffprwz 3, [[CONVREG]]
 }
 
 define i64 @f64toi64(double %a) {
@@ -50,8 +50,8 @@ entry:
 ; CHECK-P7: stfs 1,
 ; CHECK-P7: lwz 3,
 ; CHECK: xscvdpspn [[CONVREG:[0-9]+]], 1
-; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[CONVREG]], [[CONVREG]], 3
-; CHECK: mffprwz 3, [[SHIFTREG]]
+; CHECK-NOT: xxsldwi
+; CHECK: mffprwz 3, [[CONVREG]]
 }
 
 define i64 @f64toi64u(double %a) {

diff  --git a/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll b/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
index 467aa422ec93a..efec3c170f8b0 100644
--- a/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
+++ b/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
@@ -506,11 +506,9 @@ define <4 x float> @_Z10testInsEltILj0EDv4_ffET0_S1_T1_(<4 x float> %a, float %b
 entry:
 ; CHECK-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_
 ; CHECK: xscvdpspn 0, 1
-; CHECK: xxsldwi 0, 0, 0, 3
 ; CHECK: xxinsertw 34, 0, 12
 ; CHECK-BE-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_
 ; CHECK-BE: xscvdpspn 0, 1
-; CHECK-BE: xxsldwi 0, 0, 0, 3
 ; CHECK-BE: xxinsertw 34, 0, 0
   %vecins = insertelement <4 x float> %a, float %b, i32 0
   ret <4 x float> %vecins
@@ -520,11 +518,9 @@ define <4 x float> @_Z10testInsEltILj1EDv4_ffET0_S1_T1_(<4 x float> %a, float %b
 entry:
 ; CHECK-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_
 ; CHECK: xscvdpspn 0, 1
-; CHECK: xxsldwi 0, 0, 0, 3
 ; CHECK: xxinsertw 34, 0, 8
 ; CHECK-BE-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_
 ; CHECK-BE: xscvdpspn 0, 1
-; CHECK-BE: xxsldwi 0, 0, 0, 3
 ; CHECK-BE: xxinsertw 34, 0, 4
   %vecins = insertelement <4 x float> %a, float %b, i32 1
   ret <4 x float> %vecins
@@ -534,11 +530,9 @@ define <4 x float> @_Z10testInsEltILj2EDv4_ffET0_S1_T1_(<4 x float> %a, float %b
 entry:
 ; CHECK-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_
 ; CHECK: xscvdpspn 0, 1
-; CHECK: xxsldwi 0, 0, 0, 3
 ; CHECK: xxinsertw 34, 0, 4
 ; CHECK-BE-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_
 ; CHECK-BE: xscvdpspn 0, 1
-; CHECK-BE: xxsldwi 0, 0, 0, 3
 ; CHECK-BE: xxinsertw 34, 0, 8
   %vecins = insertelement <4 x float> %a, float %b, i32 2
   ret <4 x float> %vecins
@@ -548,11 +542,9 @@ define <4 x float> @_Z10testInsEltILj3EDv4_ffET0_S1_T1_(<4 x float> %a, float %b
 entry:
 ; CHECK-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_
 ; CHECK: xscvdpspn 0, 1
-; CHECK: xxsldwi 0, 0, 0, 3
 ; CHECK: xxinsertw 34, 0, 0
 ; CHECK-BE-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_
 ; CHECK-BE: xscvdpspn 0, 1
-; CHECK-BE: xxsldwi 0, 0, 0, 3
 ; CHECK-BE: xxinsertw 34, 0, 12
   %vecins = insertelement <4 x float> %a, float %b, i32 3
   ret <4 x float> %vecins

diff  --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
index 39d722adf5090..9a893bed82aac 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
@@ -216,7 +216,6 @@ define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    lfs f0, 0(r3)
 ; P9LE-NEXT:    xscvdpspn vs0, f0
-; P9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
 ; P9LE-NEXT:    xxinsertw v2, vs0, 12
 ; P9LE-NEXT:    blr
 ;
@@ -224,7 +223,6 @@ define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    lfs f0, 0(r3)
 ; P9BE-NEXT:    xscvdpspn vs0, f0
-; P9BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
 ; P9BE-NEXT:    xxinsertw v2, vs0, 0
 ; P9BE-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
index b6d6edfb7c52c..92536e0686228 100644
--- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
@@ -200,21 +200,19 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
 ; CHECK-LABEL: testFloat1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xscvdpspn vs0, f1
-; CHECK-NEXT:    extsw r3, r6
-; CHECK-NEXT:    slwi r3, r3, 2
-; CHECK-NEXT:    xxsldwi vs0, vs0, vs0, 3
-; CHECK-NEXT:    mffprwz r4, f0
-; CHECK-NEXT:    vinswrx v2, r3, r4
+; CHECK-NEXT:    extsw r4, r6
+; CHECK-NEXT:    slwi r4, r4, 2
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    vinswrx v2, r4, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testFloat1:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xscvdpspn vs0, f1
-; CHECK-BE-NEXT:    extsw r3, r6
-; CHECK-BE-NEXT:    slwi r3, r3, 2
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
-; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    vinswlx v2, r3, r4
+; CHECK-BE-NEXT:    extsw r4, r6
+; CHECK-BE-NEXT:    slwi r4, r4, 2
+; CHECK-BE-NEXT:    mffprwz r3, f0
+; CHECK-BE-NEXT:    vinswlx v2, r4, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testFloat1:
@@ -346,7 +344,6 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
 ; CHECK-LABEL: testFloatImm1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xscvdpspn vs0, f1
-; CHECK-NEXT:    xxsldwi vs0, vs0, vs0, 3
 ; CHECK-NEXT:    xxinsertw v2, vs0, 12
 ; CHECK-NEXT:    xxinsertw v2, vs0, 4
 ; CHECK-NEXT:    blr
@@ -354,7 +351,6 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
 ; CHECK-BE-LABEL: testFloatImm1:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xscvdpspn vs0, f1
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xxinsertw v2, vs0, 0
 ; CHECK-BE-NEXT:    xxinsertw v2, vs0, 8
 ; CHECK-BE-NEXT:    blr
@@ -362,7 +358,6 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
 ; CHECK-P9-LABEL: testFloatImm1:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xscvdpspn vs0, f1
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xxinsertw v2, vs0, 0
 ; CHECK-P9-NEXT:    xxinsertw v2, vs0, 8
 ; CHECK-P9-NEXT:    blr
@@ -393,11 +388,9 @@ define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) {
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lfs f0, 0(r5)
 ; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xxinsertw v2, vs0, 0
 ; CHECK-P9-NEXT:    lfs f0, 4(r5)
 ; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xxinsertw v2, vs0, 8
 ; CHECK-P9-NEXT:    blr
 entry:
@@ -439,11 +432,9 @@ define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) {
 ; CHECK-P9-NEXT:    li r3, 1
 ; CHECK-P9-NEXT:    rldic r3, r3, 38, 25
 ; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xxinsertw v2, vs0, 0
 ; CHECK-P9-NEXT:    lfsx f0, r5, r3
 ; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xxinsertw v2, vs0, 8
 ; CHECK-P9-NEXT:    blr
 entry:
@@ -738,3 +729,26 @@ entry:
   ret <2 x double> %vecins
 }
 
+define dso_local <4 x float> @testInsertDoubleToFloat(<4 x float> %a, double %b) local_unnamed_addr #0 {
+; CHECK-LABEL: testInsertDoubleToFloat:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xscvdpsp f0, f1
+; CHECK-NEXT:    xxinsertw v2, vs0, 8
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: testInsertDoubleToFloat:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xscvdpsp f0, f1
+; CHECK-BE-NEXT:    xxinsertw v2, vs0, 4
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-P9-LABEL: testInsertDoubleToFloat:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xscvdpsp f0, f1
+; CHECK-P9-NEXT:    xxinsertw v2, vs0, 4
+; CHECK-P9-NEXT:    blr
+entry:
+  %conv = fptrunc double %b to float
+  %vecins = insertelement <4 x float> %a, float %conv, i32 1
+  ret <4 x float> %vecins
+}