[llvm] af43094 - [PowerPC][AIX] Allow VSX patterns to be 32-bit and 64-bit safe on P8+.

Amy Kwan via llvm-commits llvm-commits at lists.llvm.org
Fri May 27 08:34:34 PDT 2022


Author: Amy Kwan
Date: 2022-05-27T10:34:17-05:00
New Revision: af430944b3ba8ca55c4fd6b73f53c198c469ffee

URL: https://github.com/llvm/llvm-project/commit/af430944b3ba8ca55c4fd6b73f53c198c469ffee
DIFF: https://github.com/llvm/llvm-project/commit/af430944b3ba8ca55c4fd6b73f53c198c469ffee.diff

LOG: [PowerPC][AIX] Allow VSX patterns to be 32-bit and 64-bit safe on P8+.

This patch updates two patterns involving `scalar_to_vector` and
`SCALAR_TO_VECTOR_PERMUTED` nodes to be safe for both 64-bit and 32-bit by
pulling the patterns out of the 64-bit specific guard. These patterns are
matched on POWER8 and above.

Differential Revision: https://reviews.llvm.org/D125389

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
    llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
    llvm/test/CodeGen/PowerPC/float-vector-gather.ll
    llvm/test/CodeGen/PowerPC/load-and-splat.ll
    llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll
    llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
    llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
    llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
    llvm/test/CodeGen/PowerPC/vec_insert_elt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 1e87af516e52b..6e562498dcf90 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3367,6 +3367,15 @@ def : Pat<(f32 (vector_extract v4f32:$S, i32:$Idx)),
 
 def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)),
           (f64 VectorExtractions.BE_32B_VARIABLE_DOUBLE)>;
+
+defm : ScalToVecWPermute<
+  v4i32, (i32 (load ForceXForm:$src)),
+  (XXSLDWIs (LIWZX ForceXForm:$src), 1),
+  (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
+defm : ScalToVecWPermute<
+  v4f32, (f32 (load ForceXForm:$src)),
+  (XXSLDWIs (LIWZX ForceXForm:$src), 1),
+  (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
 } // HasVSX, HasP8Vector, IsBigEndian
 
 // Big endian Power8 64Bit VSX subtarget.
@@ -3381,14 +3390,6 @@ def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 ForceXForm:$src)))),
           (v2i64 (SUBREG_TO_REG (i64 1), (LIWAX ForceXForm:$src), sub_64))>;
 def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 ForceXForm:$src)))),
           (v2i64 (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64))>;
-defm : ScalToVecWPermute<
-  v4i32, (i32 (load ForceXForm:$src)),
-  (XXSLDWIs (LIWZX ForceXForm:$src), 1),
-  (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
-defm : ScalToVecWPermute<
-  v4f32, (f32 (load ForceXForm:$src)),
-  (XXSLDWIs (LIWZX ForceXForm:$src), 1),
-  (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
 
 def : Pat<DWToSPExtractConv.BVU,
           (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),

diff  --git a/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll b/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
index 71d70167b12c1..b0716a57b318d 100644
--- a/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
+++ b/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
@@ -30,18 +30,15 @@ define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
 ;
 ; AIX-P8-32-LABEL: test_f2:
 ; AIX-P8-32:       # %bb.0:
-; AIX-P8-32-NEXT:    lfs f0, 4(r3)
-; AIX-P8-32-NEXT:    lfs f1, 0(r3)
 ; AIX-P8-32-NEXT:    lwz r6, L..C0(r2) # %const.0
-; AIX-P8-32-NEXT:    lfs f2, 4(r4)
-; AIX-P8-32-NEXT:    xscvdpspn v2, f0
-; AIX-P8-32-NEXT:    lfs f0, 0(r4)
-; AIX-P8-32-NEXT:    lxvw4x v0, 0, r6
-; AIX-P8-32-NEXT:    xscvdpspn v3, f1
-; AIX-P8-32-NEXT:    xscvdpspn v4, f2
-; AIX-P8-32-NEXT:    xscvdpspn v5, f0
-; AIX-P8-32-NEXT:    vperm v2, v3, v2, v0
-; AIX-P8-32-NEXT:    vperm v3, v5, v4, v0
+; AIX-P8-32-NEXT:    li r7, 4
+; AIX-P8-32-NEXT:    lxsiwzx v3, 0, r3
+; AIX-P8-32-NEXT:    lxsiwzx v0, 0, r4
+; AIX-P8-32-NEXT:    lxsiwzx v2, r3, r7
+; AIX-P8-32-NEXT:    lxsiwzx v5, r4, r7
+; AIX-P8-32-NEXT:    lxvw4x v4, 0, r6
+; AIX-P8-32-NEXT:    vperm v2, v3, v2, v4
+; AIX-P8-32-NEXT:    vperm v3, v0, v5, v4
 ; AIX-P8-32-NEXT:    xvaddsp vs0, v2, v3
 ; AIX-P8-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; AIX-P8-32-NEXT:    xscvspdpn f0, vs0
@@ -60,15 +57,15 @@ define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
 ;
 ; AIX-P9-32-LABEL: test_f2:
 ; AIX-P9-32:       # %bb.0:
-; AIX-P9-32-NEXT:    lfs f0, 0(r3)
+; AIX-P9-32-NEXT:    lfiwzx f0, 0, r3
 ; AIX-P9-32-NEXT:    lwz r3, 4(r3)
+; AIX-P9-32-NEXT:    xxsldwi vs0, f0, f0, 1
 ; AIX-P9-32-NEXT:    mtfprwz f1, r3
 ; AIX-P9-32-NEXT:    lwz r3, 4(r4)
-; AIX-P9-32-NEXT:    xscvdpspn vs0, f0
-; AIX-P9-32-NEXT:    mtfprwz f2, r3
 ; AIX-P9-32-NEXT:    xxinsertw vs0, vs1, 4
-; AIX-P9-32-NEXT:    lfs f1, 0(r4)
-; AIX-P9-32-NEXT:    xscvdpspn vs1, f1
+; AIX-P9-32-NEXT:    lfiwzx f1, 0, r4
+; AIX-P9-32-NEXT:    mtfprwz f2, r3
+; AIX-P9-32-NEXT:    xxsldwi vs1, f1, f1, 1
 ; AIX-P9-32-NEXT:    xxinsertw vs1, vs2, 4
 ; AIX-P9-32-NEXT:    xvaddsp vs0, vs0, vs1
 ; AIX-P9-32-NEXT:    xscvspdpn f1, vs0

diff  --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index 1d424446db7c0..43400d458485d 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -974,8 +974,7 @@ define dso_local <4 x i32> @testSplat4hi(<8 x i8>* nocapture readonly %ptr) loca
 ;
 ; P8-AIX-32-LABEL: testSplat4hi:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r3, 0(r3)
-; P8-AIX-32-NEXT:    mtfprwz f0, r3
+; P8-AIX-32-NEXT:    lfiwzx f0, 0, r3
 ; P8-AIX-32-NEXT:    xxspltw v2, vs0, 1
 ; P8-AIX-32-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/float-vector-gather.ll b/llvm/test/CodeGen/PowerPC/float-vector-gather.ll
index 0b3e75e1c370d..7d97598a66b86 100644
--- a/llvm/test/CodeGen/PowerPC/float-vector-gather.ll
+++ b/llvm/test/CodeGen/PowerPC/float-vector-gather.ll
@@ -43,17 +43,13 @@ float* nocapture readonly %d) {
 
 ; CHECK-BE-AIX-32-LABEL: vector_gatherf:
 ; CHECK-BE-AIX-32-LABEL: # %bb.0: # %entry
-; CHECK-BE-AIX-32-DAG: lfs f[[REG0:[0-9]+]]
-; CHECK-BE-AIX-32-DAG: lfs f[[REG1:[0-9]+]]
-; CHECK-BE-AIX-32-DAG: lfs f[[REG2:[0-9]+]]
-; CHECK-BE-AIX-32-DAG: lfs f[[REG3:[0-9]+]]
-; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG0:[0-9]+]], f[[REG0]]
-; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG1:[0-9]+]], f[[REG1]]
-; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG2:[0-9]+]], f[[REG2]]
-; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG0:[0-9]+]], f[[REG3]]
-; CHECK-BE-AIX-32-DAG: vmrgow v[[VREG1]], v[[VREG0]], v[[VREG1]]
-; CHECK-BE-AIX-32-DAG: vmrgow v[[VREG0]], v[[VREG2]], v[[VREG0]]
-; CHECK-BE-AIX-32-NEXT: xxmrghd v[[VREG1]], v[[VREG0]], v[[VREG1]]
+; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG0:[0-9]+]]
+; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG1:[0-9]+]]
+; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG2:[0-9]+]]
+; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG3:[0-9]+]]
+; CHECK-BE-AIX-32-DAG: vmrgow v[[REG0]], v[[REG1]], v[[REG0]]
+; CHECK-BE-AIX-32-DAG: vmrgow v[[REG3]], v[[REG2]], v[[REG3]]
+; CHECK-BE-AIX-32-NEXT: xxmrghd v[[REG0]], v[[REG3]], v[[REG0]]
 ; CHECK-BE-AIX-32-NEXT: blr
 entry:
   %0 = load float, float* %a, align 4

diff  --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index 469a56dfac1a8..699f5a8c60b7b 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -560,8 +560,7 @@ define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) {
 ;
 ; P8-AIX32-LABEL: unadjusted_lxvwsx:
 ; P8-AIX32:       # %bb.0: # %entry
-; P8-AIX32-NEXT:    lwz r3, 0(r3)
-; P8-AIX32-NEXT:    mtfprwz f0, r3
+; P8-AIX32-NEXT:    lfiwzx f0, 0, r3
 ; P8-AIX32-NEXT:    xxspltw v2, vs0, 1
 ; P8-AIX32-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll
index 8160263792246..d332f548d33ba 100644
--- a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll
+++ b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll
@@ -28,8 +28,7 @@ define <16 x i8> @test(i32* %s, i32* %t) {
 ;
 ; CHECK-AIX-32-LABEL: test:
 ; CHECK-AIX-32:       # %bb.0: # %entry
-; CHECK-AIX-32-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-32-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-32-NEXT:    xxspltw v2, vs0, 1
 ; CHECK-AIX-32-NEXT:    blr
 

diff  --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index f3959c3c8ec90..d170dc7e41537 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -66,9 +66,8 @@ define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE: lxsiwzx [[REG:[0-9]+]]
 ; P9BE: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG]]
 ; P9BE-32-LABEL: test32:
-; P9BE-32: lwzx [[REG1:[0-9]+]]
-; P9BE-32: mtvsrwz [[REG2:[0-9]+]], [[REG1]]
-; P9BE-32: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG2]]
+; P9BE-32: lxsiwzx [[REG:[0-9]+]]
+; P9BE-32: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG]]
 entry:
   %idx.ext63 = sext i32 %i_pix2 to i64
   %add.ptr64 = getelementptr inbounds i8, i8* %pix2, i64 %idx.ext63

diff  --git a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
index d7883b540d892..5034778592a5c 100644
--- a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
+++ b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
@@ -68,18 +68,15 @@ define dso_local <2 x double> @test2(<2 x float>* nocapture readonly %a, <2 x fl
 ;
 ; AIX-32-LABEL: test2:
 ; AIX-32:       # %bb.0: # %entry
-; AIX-32-NEXT:    lfs f0, 4(r3)
-; AIX-32-NEXT:    lfs f1, 0(r3)
 ; AIX-32-NEXT:    lwz r5, L..C0(r2) # %const.0
-; AIX-32-NEXT:    lfs f2, 4(r4)
-; AIX-32-NEXT:    xscvdpspn v2, f0
-; AIX-32-NEXT:    lfs f0, 0(r4)
-; AIX-32-NEXT:    lxvw4x v0, 0, r5
-; AIX-32-NEXT:    xscvdpspn v3, f1
-; AIX-32-NEXT:    xscvdpspn v4, f2
-; AIX-32-NEXT:    xscvdpspn v5, f0
-; AIX-32-NEXT:    vperm v2, v3, v2, v0
-; AIX-32-NEXT:    vperm v3, v5, v4, v0
+; AIX-32-NEXT:    li r6, 4
+; AIX-32-NEXT:    lxsiwzx v3, 0, r3
+; AIX-32-NEXT:    lxsiwzx v0, 0, r4
+; AIX-32-NEXT:    lxsiwzx v2, r3, r6
+; AIX-32-NEXT:    lxsiwzx v5, r4, r6
+; AIX-32-NEXT:    lxvw4x v4, 0, r5
+; AIX-32-NEXT:    vperm v2, v3, v2, v4
+; AIX-32-NEXT:    vperm v3, v0, v5, v4
 ; AIX-32-NEXT:    xvsubsp vs0, v2, v3
 ; AIX-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; AIX-32-NEXT:    xscvspdpn f0, vs0
@@ -117,18 +114,15 @@ define dso_local <2 x double> @test3(<2 x float>* nocapture readonly %a, <2 x fl
 ;
 ; AIX-32-LABEL: test3:
 ; AIX-32:       # %bb.0: # %entry
-; AIX-32-NEXT:    lfs f0, 4(r3)
-; AIX-32-NEXT:    lfs f1, 0(r3)
 ; AIX-32-NEXT:    lwz r5, L..C1(r2) # %const.0
-; AIX-32-NEXT:    lfs f2, 4(r4)
-; AIX-32-NEXT:    xscvdpspn v2, f0
-; AIX-32-NEXT:    lfs f0, 0(r4)
-; AIX-32-NEXT:    lxvw4x v0, 0, r5
-; AIX-32-NEXT:    xscvdpspn v3, f1
-; AIX-32-NEXT:    xscvdpspn v4, f2
-; AIX-32-NEXT:    xscvdpspn v5, f0
-; AIX-32-NEXT:    vperm v2, v3, v2, v0
-; AIX-32-NEXT:    vperm v3, v5, v4, v0
+; AIX-32-NEXT:    li r6, 4
+; AIX-32-NEXT:    lxsiwzx v3, 0, r3
+; AIX-32-NEXT:    lxsiwzx v0, 0, r4
+; AIX-32-NEXT:    lxsiwzx v2, r3, r6
+; AIX-32-NEXT:    lxsiwzx v5, r4, r6
+; AIX-32-NEXT:    lxvw4x v4, 0, r5
+; AIX-32-NEXT:    vperm v2, v3, v2, v4
+; AIX-32-NEXT:    vperm v3, v0, v5, v4
 ; AIX-32-NEXT:    xvaddsp vs0, v2, v3
 ; AIX-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; AIX-32-NEXT:    xscvspdpn f0, vs0
@@ -166,18 +160,15 @@ define dso_local <2 x double> @test4(<2 x float>* nocapture readonly %a, <2 x fl
 ;
 ; AIX-32-LABEL: test4:
 ; AIX-32:       # %bb.0: # %entry
-; AIX-32-NEXT:    lfs f0, 4(r3)
-; AIX-32-NEXT:    lfs f1, 0(r3)
 ; AIX-32-NEXT:    lwz r5, L..C2(r2) # %const.0
-; AIX-32-NEXT:    lfs f2, 4(r4)
-; AIX-32-NEXT:    xscvdpspn v2, f0
-; AIX-32-NEXT:    lfs f0, 0(r4)
-; AIX-32-NEXT:    lxvw4x v0, 0, r5
-; AIX-32-NEXT:    xscvdpspn v3, f1
-; AIX-32-NEXT:    xscvdpspn v4, f2
-; AIX-32-NEXT:    xscvdpspn v5, f0
-; AIX-32-NEXT:    vperm v2, v3, v2, v0
-; AIX-32-NEXT:    vperm v3, v5, v4, v0
+; AIX-32-NEXT:    li r6, 4
+; AIX-32-NEXT:    lxsiwzx v3, 0, r3
+; AIX-32-NEXT:    lxsiwzx v0, 0, r4
+; AIX-32-NEXT:    lxsiwzx v2, r3, r6
+; AIX-32-NEXT:    lxsiwzx v5, r4, r6
+; AIX-32-NEXT:    lxvw4x v4, 0, r5
+; AIX-32-NEXT:    vperm v2, v3, v2, v4
+; AIX-32-NEXT:    vperm v3, v0, v5, v4
 ; AIX-32-NEXT:    xvmulsp vs0, v2, v3
 ; AIX-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; AIX-32-NEXT:    xscvspdpn f0, vs0

diff  --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
index e0fd7526faaf9..87e4c7194d966 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
@@ -10,16 +10,16 @@
 
 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
 ; RUN:    -mtriple=powerpc64-ibm-aix-xcoff< %s | FileCheck %s \
-; RUN:    --check-prefixes=P9-AIX,P9-AIX-64
+; RUN:    --check-prefixes=AIX,P9-AIX,P9-AIX-64
 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
 ; RUN:    -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \
-; RUN:    --check-prefixes=P9-AIX,P9-AIX-32
+; RUN:    --check-prefixes=AIX,P9-AIX,P9-AIX-32
 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
 ; RUN:    -mtriple=powerpc64-ibm-aix-xcoff < %s | FileCheck %s \
-; RUN:    --check-prefixes=P8-AIX-64
+; RUN:    --check-prefixes=AIX,P8-AIX-64
 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
 ; RUN:    -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \
-; RUN:    --check-prefixes=P8-AIX-32
+; RUN:    --check-prefixes=AIX,P8-AIX-32
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec)  {
@@ -422,9 +422,8 @@ define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec
 ;
 ; P8-AIX-32-LABEL: s2v_test_f1:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lfs f0, 0(r3)
 ; P8-AIX-32-NEXT:    lwz r4, L..C5(r2) # %const.0
-; P8-AIX-32-NEXT:    xscvdpspn v3, f0
+; P8-AIX-32-NEXT:    lxsiwzx v3, 0, r3
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r4
 ; P8-AIX-32-NEXT:    vperm v2, v3, v2, v4
 ; P8-AIX-32-NEXT:    blr
@@ -466,33 +465,12 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec
 ; P8BE-NEXT:    vmrgow v2, v3, v2
 ; P8BE-NEXT:    blr
 ;
-; P9-AIX-64-LABEL: s2v_test_f2:
-; P9-AIX-64:       # %bb.0: # %entry
-; P9-AIX-64-NEXT:    addi r3, r3, 4
-; P9-AIX-64-NEXT:    lxsiwzx v3, 0, r3
-; P9-AIX-64-NEXT:    vmrgow v2, v3, v2
-; P9-AIX-64-NEXT:    blr
-;
-; P9-AIX-32-LABEL: s2v_test_f2:
-; P9-AIX-32:       # %bb.0: # %entry
-; P9-AIX-32-NEXT:    lfs f0, 4(r3)
-; P9-AIX-32-NEXT:    xscvdpspn v3, f0
-; P9-AIX-32-NEXT:    vmrgow v2, v3, v2
-; P9-AIX-32-NEXT:    blr
-;
-; P8-AIX-64-LABEL: s2v_test_f2:
-; P8-AIX-64:       # %bb.0: # %entry
-; P8-AIX-64-NEXT:    addi r3, r3, 4
-; P8-AIX-64-NEXT:    lxsiwzx v3, 0, r3
-; P8-AIX-64-NEXT:    vmrgow v2, v3, v2
-; P8-AIX-64-NEXT:    blr
-;
-; P8-AIX-32-LABEL: s2v_test_f2:
-; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lfs f0, 4(r3)
-; P8-AIX-32-NEXT:    xscvdpspn v3, f0
-; P8-AIX-32-NEXT:    vmrgow v2, v3, v2
-; P8-AIX-32-NEXT:    blr
+; AIX-LABEL: s2v_test_f2:
+; AIX:       # %bb.0: # %entry
+; AIX-NEXT:    addi r3, r3, 4
+; AIX-NEXT:    lxsiwzx v3, 0, r3
+; AIX-NEXT:    vmrgow v2, v3, v2
+; AIX-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds float, float* %f64, i64 1
   %0 = load float, float* %arrayidx, align 8
@@ -542,8 +520,7 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec
 ; P9-AIX-32-LABEL: s2v_test_f3:
 ; P9-AIX-32:       # %bb.0: # %entry
 ; P9-AIX-32-NEXT:    slwi r4, r4, 2
-; P9-AIX-32-NEXT:    lfsx f0, r3, r4
-; P9-AIX-32-NEXT:    xscvdpspn v3, f0
+; P9-AIX-32-NEXT:    lxsiwzx v3, r3, r4
 ; P9-AIX-32-NEXT:    vmrgow v2, v3, v2
 ; P9-AIX-32-NEXT:    blr
 ;
@@ -557,8 +534,7 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec
 ; P8-AIX-32-LABEL: s2v_test_f3:
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    slwi r4, r4, 2
-; P8-AIX-32-NEXT:    lfsx f0, r3, r4
-; P8-AIX-32-NEXT:    xscvdpspn v3, f0
+; P8-AIX-32-NEXT:    lxsiwzx v3, r3, r4
 ; P8-AIX-32-NEXT:    vmrgow v2, v3, v2
 ; P8-AIX-32-NEXT:    blr
 entry:
@@ -601,33 +577,12 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec
 ; P8BE-NEXT:    vmrgow v2, v3, v2
 ; P8BE-NEXT:    blr
 ;
-; P9-AIX-64-LABEL: s2v_test_f4:
-; P9-AIX-64:       # %bb.0: # %entry
-; P9-AIX-64-NEXT:    addi r3, r3, 4
-; P9-AIX-64-NEXT:    lxsiwzx v3, 0, r3
-; P9-AIX-64-NEXT:    vmrgow v2, v3, v2
-; P9-AIX-64-NEXT:    blr
-;
-; P9-AIX-32-LABEL: s2v_test_f4:
-; P9-AIX-32:       # %bb.0: # %entry
-; P9-AIX-32-NEXT:    lfs f0, 4(r3)
-; P9-AIX-32-NEXT:    xscvdpspn v3, f0
-; P9-AIX-32-NEXT:    vmrgow v2, v3, v2
-; P9-AIX-32-NEXT:    blr
-;
-; P8-AIX-64-LABEL: s2v_test_f4:
-; P8-AIX-64:       # %bb.0: # %entry
-; P8-AIX-64-NEXT:    addi r3, r3, 4
-; P8-AIX-64-NEXT:    lxsiwzx v3, 0, r3
-; P8-AIX-64-NEXT:    vmrgow v2, v3, v2
-; P8-AIX-64-NEXT:    blr
-;
-; P8-AIX-32-LABEL: s2v_test_f4:
-; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lfs f0, 4(r3)
-; P8-AIX-32-NEXT:    xscvdpspn v3, f0
-; P8-AIX-32-NEXT:    vmrgow v2, v3, v2
-; P8-AIX-32-NEXT:    blr
+; AIX-LABEL: s2v_test_f4:
+; AIX:       # %bb.0: # %entry
+; AIX-NEXT:    addi r3, r3, 4
+; AIX-NEXT:    lxsiwzx v3, 0, r3
+; AIX-NEXT:    vmrgow v2, v3, v2
+; AIX-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds float, float* %f64, i64 1
   %0 = load float, float* %arrayidx, align 8
@@ -663,31 +618,11 @@ define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr
 ; P8BE-NEXT:    vmrgow v2, v3, v2
 ; P8BE-NEXT:    blr
 ;
-; P9-AIX-64-LABEL: s2v_test_f5:
-; P9-AIX-64:       # %bb.0: # %entry
-; P9-AIX-64-NEXT:    lxsiwzx v3, 0, r3
-; P9-AIX-64-NEXT:    vmrgow v2, v3, v2
-; P9-AIX-64-NEXT:    blr
-;
-; P9-AIX-32-LABEL: s2v_test_f5:
-; P9-AIX-32:       # %bb.0: # %entry
-; P9-AIX-32-NEXT:    lfs f0, 0(r3)
-; P9-AIX-32-NEXT:    xscvdpspn v3, f0
-; P9-AIX-32-NEXT:    vmrgow v2, v3, v2
-; P9-AIX-32-NEXT:    blr
-;
-; P8-AIX-64-LABEL: s2v_test_f5:
-; P8-AIX-64:       # %bb.0: # %entry
-; P8-AIX-64-NEXT:    lxsiwzx v3, 0, r3
-; P8-AIX-64-NEXT:    vmrgow v2, v3, v2
-; P8-AIX-64-NEXT:    blr
-;
-; P8-AIX-32-LABEL: s2v_test_f5:
-; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lfs f0, 0(r3)
-; P8-AIX-32-NEXT:    xscvdpspn v3, f0
-; P8-AIX-32-NEXT:    vmrgow v2, v3, v2
-; P8-AIX-32-NEXT:    blr
+; AIX-LABEL: s2v_test_f5:
+; AIX:       # %bb.0: # %entry
+; AIX-NEXT:    lxsiwzx v3, 0, r3
+; AIX-NEXT:    vmrgow v2, v3, v2
+; AIX-NEXT:    blr
 entry:
   %0 = load float, float* %ptr1, align 8
   %vecins = insertelement <2 x float> %vec, float %0, i32 0

diff  --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
index 225367f5a886d..dd873aad5ec82 100644
--- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
@@ -659,15 +659,14 @@ define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) {
 ;
 ; AIX-P8-32-LABEL: testFloatImm2:
 ; AIX-P8-32:       # %bb.0: # %entry
-; AIX-P8-32-NEXT:    lfs f0, 0(r3)
 ; AIX-P8-32-NEXT:    lwz r4, L..C8(r2) # %const.0
-; AIX-P8-32-NEXT:    xscvdpspn v3, f0
+; AIX-P8-32-NEXT:    lxsiwzx v3, 0, r3
+; AIX-P8-32-NEXT:    li r5, 4
 ; AIX-P8-32-NEXT:    lxvw4x v4, 0, r4
-; AIX-P8-32-NEXT:    lfs f0, 4(r3)
-; AIX-P8-32-NEXT:    lwz r3, L..C9(r2) # %const.1
+; AIX-P8-32-NEXT:    lwz r4, L..C9(r2) # %const.1
 ; AIX-P8-32-NEXT:    vperm v2, v3, v2, v4
-; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
-; AIX-P8-32-NEXT:    xscvdpspn v3, f0
+; AIX-P8-32-NEXT:    lxsiwzx v3, r3, r5
+; AIX-P8-32-NEXT:    lxvw4x v4, 0, r4
 ; AIX-P8-32-NEXT:    vperm v2, v2, v3, v4
 ; AIX-P8-32-NEXT:    blr
 entry:
@@ -732,17 +731,15 @@ define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) {
 ;
 ; AIX-P8-32-LABEL: testFloatImm3:
 ; AIX-P8-32:       # %bb.0: # %entry
-; AIX-P8-32-NEXT:    lis r4, 4
-; AIX-P8-32-NEXT:    lfsx f0, r3, r4
 ; AIX-P8-32-NEXT:    lwz r4, L..C10(r2) # %const.0
-; AIX-P8-32-NEXT:    xscvdpspn v3, f0
+; AIX-P8-32-NEXT:    lis r5, 4
+; AIX-P8-32-NEXT:    lxsiwzx v3, r3, r5
 ; AIX-P8-32-NEXT:    lxvw4x v4, 0, r4
-; AIX-P8-32-NEXT:    lfs f0, 0(r3)
-; AIX-P8-32-NEXT:    lwz r3, L..C11(r2) # %const.1
+; AIX-P8-32-NEXT:    lwz r4, L..C11(r2) # %const.1
 ; AIX-P8-32-NEXT:    vperm v2, v3, v2, v4
-; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
-; AIX-P8-32-NEXT:    xscvdpspn v3, f0
-; AIX-P8-32-NEXT:    vperm v2, v2, v3, v4
+; AIX-P8-32-NEXT:    lxvw4x v3, 0, r4
+; AIX-P8-32-NEXT:    lxsiwzx v4, 0, r3
+; AIX-P8-32-NEXT:    vperm v2, v2, v4, v3
 ; AIX-P8-32-NEXT:    blr
 entry:
   %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536


        


More information about the llvm-commits mailing list