[llvm] af43094 - [PowerPC][AIX] Allow VSX patterns to be 32-bit and 64-bit safe on P8+.
Amy Kwan via llvm-commits
llvm-commits at lists.llvm.org
Fri May 27 08:34:34 PDT 2022
Author: Amy Kwan
Date: 2022-05-27T10:34:17-05:00
New Revision: af430944b3ba8ca55c4fd6b73f53c198c469ffee
URL: https://github.com/llvm/llvm-project/commit/af430944b3ba8ca55c4fd6b73f53c198c469ffee
DIFF: https://github.com/llvm/llvm-project/commit/af430944b3ba8ca55c4fd6b73f53c198c469ffee.diff
LOG: [PowerPC][AIX] Allow VSX patterns to be 32-bit and 64-bit safe on P8+.
This patch updates two patterns involving `scalar_to_vector` and
`SCALAR_TO_VECTOR_PERMUTED` nodes to be safe for both 64-bit and 32-bit by
pulling the patterns out of the 64-bit specific guard. These patterns are
matched on POWER8 and above.
Differential Revision: https://reviews.llvm.org/D125389
Added:
Modified:
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
llvm/test/CodeGen/PowerPC/float-vector-gather.ll
llvm/test/CodeGen/PowerPC/load-and-splat.ll
llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll
llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 1e87af516e52b..6e562498dcf90 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3367,6 +3367,15 @@ def : Pat<(f32 (vector_extract v4f32:$S, i32:$Idx)),
def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)),
(f64 VectorExtractions.BE_32B_VARIABLE_DOUBLE)>;
+
+defm : ScalToVecWPermute<
+ v4i32, (i32 (load ForceXForm:$src)),
+ (XXSLDWIs (LIWZX ForceXForm:$src), 1),
+ (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
+defm : ScalToVecWPermute<
+ v4f32, (f32 (load ForceXForm:$src)),
+ (XXSLDWIs (LIWZX ForceXForm:$src), 1),
+ (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
} // HasVSX, HasP8Vector, IsBigEndian
// Big endian Power8 64Bit VSX subtarget.
@@ -3381,14 +3390,6 @@ def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 ForceXForm:$src)))),
(v2i64 (SUBREG_TO_REG (i64 1), (LIWAX ForceXForm:$src), sub_64))>;
def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 ForceXForm:$src)))),
(v2i64 (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64))>;
-defm : ScalToVecWPermute<
- v4i32, (i32 (load ForceXForm:$src)),
- (XXSLDWIs (LIWZX ForceXForm:$src), 1),
- (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
-defm : ScalToVecWPermute<
- v4f32, (f32 (load ForceXForm:$src)),
- (XXSLDWIs (LIWZX ForceXForm:$src), 1),
- (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
def : Pat<DWToSPExtractConv.BVU,
(v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
diff --git a/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll b/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
index 71d70167b12c1..b0716a57b318d 100644
--- a/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
+++ b/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
@@ -30,18 +30,15 @@ define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
;
; AIX-P8-32-LABEL: test_f2:
; AIX-P8-32: # %bb.0:
-; AIX-P8-32-NEXT: lfs f0, 4(r3)
-; AIX-P8-32-NEXT: lfs f1, 0(r3)
; AIX-P8-32-NEXT: lwz r6, L..C0(r2) # %const.0
-; AIX-P8-32-NEXT: lfs f2, 4(r4)
-; AIX-P8-32-NEXT: xscvdpspn v2, f0
-; AIX-P8-32-NEXT: lfs f0, 0(r4)
-; AIX-P8-32-NEXT: lxvw4x v0, 0, r6
-; AIX-P8-32-NEXT: xscvdpspn v3, f1
-; AIX-P8-32-NEXT: xscvdpspn v4, f2
-; AIX-P8-32-NEXT: xscvdpspn v5, f0
-; AIX-P8-32-NEXT: vperm v2, v3, v2, v0
-; AIX-P8-32-NEXT: vperm v3, v5, v4, v0
+; AIX-P8-32-NEXT: li r7, 4
+; AIX-P8-32-NEXT: lxsiwzx v3, 0, r3
+; AIX-P8-32-NEXT: lxsiwzx v0, 0, r4
+; AIX-P8-32-NEXT: lxsiwzx v2, r3, r7
+; AIX-P8-32-NEXT: lxsiwzx v5, r4, r7
+; AIX-P8-32-NEXT: lxvw4x v4, 0, r6
+; AIX-P8-32-NEXT: vperm v2, v3, v2, v4
+; AIX-P8-32-NEXT: vperm v3, v0, v5, v4
; AIX-P8-32-NEXT: xvaddsp vs0, v2, v3
; AIX-P8-32-NEXT: xxsldwi vs1, vs0, vs0, 1
; AIX-P8-32-NEXT: xscvspdpn f0, vs0
@@ -60,15 +57,15 @@ define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
;
; AIX-P9-32-LABEL: test_f2:
; AIX-P9-32: # %bb.0:
-; AIX-P9-32-NEXT: lfs f0, 0(r3)
+; AIX-P9-32-NEXT: lfiwzx f0, 0, r3
; AIX-P9-32-NEXT: lwz r3, 4(r3)
+; AIX-P9-32-NEXT: xxsldwi vs0, f0, f0, 1
; AIX-P9-32-NEXT: mtfprwz f1, r3
; AIX-P9-32-NEXT: lwz r3, 4(r4)
-; AIX-P9-32-NEXT: xscvdpspn vs0, f0
-; AIX-P9-32-NEXT: mtfprwz f2, r3
; AIX-P9-32-NEXT: xxinsertw vs0, vs1, 4
-; AIX-P9-32-NEXT: lfs f1, 0(r4)
-; AIX-P9-32-NEXT: xscvdpspn vs1, f1
+; AIX-P9-32-NEXT: lfiwzx f1, 0, r4
+; AIX-P9-32-NEXT: mtfprwz f2, r3
+; AIX-P9-32-NEXT: xxsldwi vs1, f1, f1, 1
; AIX-P9-32-NEXT: xxinsertw vs1, vs2, 4
; AIX-P9-32-NEXT: xvaddsp vs0, vs0, vs1
; AIX-P9-32-NEXT: xscvspdpn f1, vs0
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index 1d424446db7c0..43400d458485d 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -974,8 +974,7 @@ define dso_local <4 x i32> @testSplat4hi(<8 x i8>* nocapture readonly %ptr) loca
;
; P8-AIX-32-LABEL: testSplat4hi:
; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lwz r3, 0(r3)
-; P8-AIX-32-NEXT: mtfprwz f0, r3
+; P8-AIX-32-NEXT: lfiwzx f0, 0, r3
; P8-AIX-32-NEXT: xxspltw v2, vs0, 1
; P8-AIX-32-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/PowerPC/float-vector-gather.ll b/llvm/test/CodeGen/PowerPC/float-vector-gather.ll
index 0b3e75e1c370d..7d97598a66b86 100644
--- a/llvm/test/CodeGen/PowerPC/float-vector-gather.ll
+++ b/llvm/test/CodeGen/PowerPC/float-vector-gather.ll
@@ -43,17 +43,13 @@ float* nocapture readonly %d) {
; CHECK-BE-AIX-32-LABEL: vector_gatherf:
; CHECK-BE-AIX-32-LABEL: # %bb.0: # %entry
-; CHECK-BE-AIX-32-DAG: lfs f[[REG0:[0-9]+]]
-; CHECK-BE-AIX-32-DAG: lfs f[[REG1:[0-9]+]]
-; CHECK-BE-AIX-32-DAG: lfs f[[REG2:[0-9]+]]
-; CHECK-BE-AIX-32-DAG: lfs f[[REG3:[0-9]+]]
-; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG0:[0-9]+]], f[[REG0]]
-; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG1:[0-9]+]], f[[REG1]]
-; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG2:[0-9]+]], f[[REG2]]
-; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG0:[0-9]+]], f[[REG3]]
-; CHECK-BE-AIX-32-DAG: vmrgow v[[VREG1]], v[[VREG0]], v[[VREG1]]
-; CHECK-BE-AIX-32-DAG: vmrgow v[[VREG0]], v[[VREG2]], v[[VREG0]]
-; CHECK-BE-AIX-32-NEXT: xxmrghd v[[VREG1]], v[[VREG0]], v[[VREG1]]
+; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG0:[0-9]+]]
+; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG1:[0-9]+]]
+; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG2:[0-9]+]]
+; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG3:[0-9]+]]
+; CHECK-BE-AIX-32-DAG: vmrgow v[[REG0]], v[[REG1]], v[[REG0]]
+; CHECK-BE-AIX-32-DAG: vmrgow v[[REG3]], v[[REG2]], v[[REG3]]
+; CHECK-BE-AIX-32-NEXT: xxmrghd v[[REG0]], v[[REG3]], v[[REG0]]
; CHECK-BE-AIX-32-NEXT: blr
entry:
%0 = load float, float* %a, align 4
diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index 469a56dfac1a8..699f5a8c60b7b 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -560,8 +560,7 @@ define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) {
;
; P8-AIX32-LABEL: unadjusted_lxvwsx:
; P8-AIX32: # %bb.0: # %entry
-; P8-AIX32-NEXT: lwz r3, 0(r3)
-; P8-AIX32-NEXT: mtfprwz f0, r3
+; P8-AIX32-NEXT: lfiwzx f0, 0, r3
; P8-AIX32-NEXT: xxspltw v2, vs0, 1
; P8-AIX32-NEXT: blr
;
diff --git a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll
index 8160263792246..d332f548d33ba 100644
--- a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll
+++ b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll
@@ -28,8 +28,7 @@ define <16 x i8> @test(i32* %s, i32* %t) {
;
; CHECK-AIX-32-LABEL: test:
; CHECK-AIX-32: # %bb.0: # %entry
-; CHECK-AIX-32-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-NEXT: mtfprwz f0, r3
+; CHECK-AIX-32-NEXT: lfiwzx f0, 0, r3
; CHECK-AIX-32-NEXT: xxspltw v2, vs0, 1
; CHECK-AIX-32-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index f3959c3c8ec90..d170dc7e41537 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -66,9 +66,8 @@ define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE: lxsiwzx [[REG:[0-9]+]]
; P9BE: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG]]
; P9BE-32-LABEL: test32:
-; P9BE-32: lwzx [[REG1:[0-9]+]]
-; P9BE-32: mtvsrwz [[REG2:[0-9]+]], [[REG1]]
-; P9BE-32: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG2]]
+; P9BE-32: lxsiwzx [[REG:[0-9]+]]
+; P9BE-32: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG]]
entry:
%idx.ext63 = sext i32 %i_pix2 to i64
%add.ptr64 = getelementptr inbounds i8, i8* %pix2, i64 %idx.ext63
diff --git a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
index d7883b540d892..5034778592a5c 100644
--- a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
+++ b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
@@ -68,18 +68,15 @@ define dso_local <2 x double> @test2(<2 x float>* nocapture readonly %a, <2 x fl
;
; AIX-32-LABEL: test2:
; AIX-32: # %bb.0: # %entry
-; AIX-32-NEXT: lfs f0, 4(r3)
-; AIX-32-NEXT: lfs f1, 0(r3)
; AIX-32-NEXT: lwz r5, L..C0(r2) # %const.0
-; AIX-32-NEXT: lfs f2, 4(r4)
-; AIX-32-NEXT: xscvdpspn v2, f0
-; AIX-32-NEXT: lfs f0, 0(r4)
-; AIX-32-NEXT: lxvw4x v0, 0, r5
-; AIX-32-NEXT: xscvdpspn v3, f1
-; AIX-32-NEXT: xscvdpspn v4, f2
-; AIX-32-NEXT: xscvdpspn v5, f0
-; AIX-32-NEXT: vperm v2, v3, v2, v0
-; AIX-32-NEXT: vperm v3, v5, v4, v0
+; AIX-32-NEXT: li r6, 4
+; AIX-32-NEXT: lxsiwzx v3, 0, r3
+; AIX-32-NEXT: lxsiwzx v0, 0, r4
+; AIX-32-NEXT: lxsiwzx v2, r3, r6
+; AIX-32-NEXT: lxsiwzx v5, r4, r6
+; AIX-32-NEXT: lxvw4x v4, 0, r5
+; AIX-32-NEXT: vperm v2, v3, v2, v4
+; AIX-32-NEXT: vperm v3, v0, v5, v4
; AIX-32-NEXT: xvsubsp vs0, v2, v3
; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1
; AIX-32-NEXT: xscvspdpn f0, vs0
@@ -117,18 +114,15 @@ define dso_local <2 x double> @test3(<2 x float>* nocapture readonly %a, <2 x fl
;
; AIX-32-LABEL: test3:
; AIX-32: # %bb.0: # %entry
-; AIX-32-NEXT: lfs f0, 4(r3)
-; AIX-32-NEXT: lfs f1, 0(r3)
; AIX-32-NEXT: lwz r5, L..C1(r2) # %const.0
-; AIX-32-NEXT: lfs f2, 4(r4)
-; AIX-32-NEXT: xscvdpspn v2, f0
-; AIX-32-NEXT: lfs f0, 0(r4)
-; AIX-32-NEXT: lxvw4x v0, 0, r5
-; AIX-32-NEXT: xscvdpspn v3, f1
-; AIX-32-NEXT: xscvdpspn v4, f2
-; AIX-32-NEXT: xscvdpspn v5, f0
-; AIX-32-NEXT: vperm v2, v3, v2, v0
-; AIX-32-NEXT: vperm v3, v5, v4, v0
+; AIX-32-NEXT: li r6, 4
+; AIX-32-NEXT: lxsiwzx v3, 0, r3
+; AIX-32-NEXT: lxsiwzx v0, 0, r4
+; AIX-32-NEXT: lxsiwzx v2, r3, r6
+; AIX-32-NEXT: lxsiwzx v5, r4, r6
+; AIX-32-NEXT: lxvw4x v4, 0, r5
+; AIX-32-NEXT: vperm v2, v3, v2, v4
+; AIX-32-NEXT: vperm v3, v0, v5, v4
; AIX-32-NEXT: xvaddsp vs0, v2, v3
; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1
; AIX-32-NEXT: xscvspdpn f0, vs0
@@ -166,18 +160,15 @@ define dso_local <2 x double> @test4(<2 x float>* nocapture readonly %a, <2 x fl
;
; AIX-32-LABEL: test4:
; AIX-32: # %bb.0: # %entry
-; AIX-32-NEXT: lfs f0, 4(r3)
-; AIX-32-NEXT: lfs f1, 0(r3)
; AIX-32-NEXT: lwz r5, L..C2(r2) # %const.0
-; AIX-32-NEXT: lfs f2, 4(r4)
-; AIX-32-NEXT: xscvdpspn v2, f0
-; AIX-32-NEXT: lfs f0, 0(r4)
-; AIX-32-NEXT: lxvw4x v0, 0, r5
-; AIX-32-NEXT: xscvdpspn v3, f1
-; AIX-32-NEXT: xscvdpspn v4, f2
-; AIX-32-NEXT: xscvdpspn v5, f0
-; AIX-32-NEXT: vperm v2, v3, v2, v0
-; AIX-32-NEXT: vperm v3, v5, v4, v0
+; AIX-32-NEXT: li r6, 4
+; AIX-32-NEXT: lxsiwzx v3, 0, r3
+; AIX-32-NEXT: lxsiwzx v0, 0, r4
+; AIX-32-NEXT: lxsiwzx v2, r3, r6
+; AIX-32-NEXT: lxsiwzx v5, r4, r6
+; AIX-32-NEXT: lxvw4x v4, 0, r5
+; AIX-32-NEXT: vperm v2, v3, v2, v4
+; AIX-32-NEXT: vperm v3, v0, v5, v4
; AIX-32-NEXT: xvmulsp vs0, v2, v3
; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1
; AIX-32-NEXT: xscvspdpn f0, vs0
diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
index e0fd7526faaf9..87e4c7194d966 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
@@ -10,16 +10,16 @@
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64-ibm-aix-xcoff< %s | FileCheck %s \
-; RUN: --check-prefixes=P9-AIX,P9-AIX-64
+; RUN: --check-prefixes=AIX,P9-AIX,P9-AIX-64
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \
-; RUN: --check-prefixes=P9-AIX,P9-AIX-32
+; RUN: --check-prefixes=AIX,P9-AIX,P9-AIX-32
; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64-ibm-aix-xcoff < %s | FileCheck %s \
-; RUN: --check-prefixes=P8-AIX-64
+; RUN: --check-prefixes=AIX,P8-AIX-64
; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \
-; RUN: --check-prefixes=P8-AIX-32
+; RUN: --check-prefixes=AIX,P8-AIX-32
; Function Attrs: norecurse nounwind readonly
define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) {
@@ -422,9 +422,8 @@ define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec
;
; P8-AIX-32-LABEL: s2v_test_f1:
; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lfs f0, 0(r3)
; P8-AIX-32-NEXT: lwz r4, L..C5(r2) # %const.0
-; P8-AIX-32-NEXT: xscvdpspn v3, f0
+; P8-AIX-32-NEXT: lxsiwzx v3, 0, r3
; P8-AIX-32-NEXT: lxvw4x v4, 0, r4
; P8-AIX-32-NEXT: vperm v2, v3, v2, v4
; P8-AIX-32-NEXT: blr
@@ -466,33 +465,12 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec
; P8BE-NEXT: vmrgow v2, v3, v2
; P8BE-NEXT: blr
;
-; P9-AIX-64-LABEL: s2v_test_f2:
-; P9-AIX-64: # %bb.0: # %entry
-; P9-AIX-64-NEXT: addi r3, r3, 4
-; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3
-; P9-AIX-64-NEXT: vmrgow v2, v3, v2
-; P9-AIX-64-NEXT: blr
-;
-; P9-AIX-32-LABEL: s2v_test_f2:
-; P9-AIX-32: # %bb.0: # %entry
-; P9-AIX-32-NEXT: lfs f0, 4(r3)
-; P9-AIX-32-NEXT: xscvdpspn v3, f0
-; P9-AIX-32-NEXT: vmrgow v2, v3, v2
-; P9-AIX-32-NEXT: blr
-;
-; P8-AIX-64-LABEL: s2v_test_f2:
-; P8-AIX-64: # %bb.0: # %entry
-; P8-AIX-64-NEXT: addi r3, r3, 4
-; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3
-; P8-AIX-64-NEXT: vmrgow v2, v3, v2
-; P8-AIX-64-NEXT: blr
-;
-; P8-AIX-32-LABEL: s2v_test_f2:
-; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lfs f0, 4(r3)
-; P8-AIX-32-NEXT: xscvdpspn v3, f0
-; P8-AIX-32-NEXT: vmrgow v2, v3, v2
-; P8-AIX-32-NEXT: blr
+; AIX-LABEL: s2v_test_f2:
+; AIX: # %bb.0: # %entry
+; AIX-NEXT: addi r3, r3, 4
+; AIX-NEXT: lxsiwzx v3, 0, r3
+; AIX-NEXT: vmrgow v2, v3, v2
+; AIX-NEXT: blr
entry:
%arrayidx = getelementptr inbounds float, float* %f64, i64 1
%0 = load float, float* %arrayidx, align 8
@@ -542,8 +520,7 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec
; P9-AIX-32-LABEL: s2v_test_f3:
; P9-AIX-32: # %bb.0: # %entry
; P9-AIX-32-NEXT: slwi r4, r4, 2
-; P9-AIX-32-NEXT: lfsx f0, r3, r4
-; P9-AIX-32-NEXT: xscvdpspn v3, f0
+; P9-AIX-32-NEXT: lxsiwzx v3, r3, r4
; P9-AIX-32-NEXT: vmrgow v2, v3, v2
; P9-AIX-32-NEXT: blr
;
@@ -557,8 +534,7 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec
; P8-AIX-32-LABEL: s2v_test_f3:
; P8-AIX-32: # %bb.0: # %entry
; P8-AIX-32-NEXT: slwi r4, r4, 2
-; P8-AIX-32-NEXT: lfsx f0, r3, r4
-; P8-AIX-32-NEXT: xscvdpspn v3, f0
+; P8-AIX-32-NEXT: lxsiwzx v3, r3, r4
; P8-AIX-32-NEXT: vmrgow v2, v3, v2
; P8-AIX-32-NEXT: blr
entry:
@@ -601,33 +577,12 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec
; P8BE-NEXT: vmrgow v2, v3, v2
; P8BE-NEXT: blr
;
-; P9-AIX-64-LABEL: s2v_test_f4:
-; P9-AIX-64: # %bb.0: # %entry
-; P9-AIX-64-NEXT: addi r3, r3, 4
-; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3
-; P9-AIX-64-NEXT: vmrgow v2, v3, v2
-; P9-AIX-64-NEXT: blr
-;
-; P9-AIX-32-LABEL: s2v_test_f4:
-; P9-AIX-32: # %bb.0: # %entry
-; P9-AIX-32-NEXT: lfs f0, 4(r3)
-; P9-AIX-32-NEXT: xscvdpspn v3, f0
-; P9-AIX-32-NEXT: vmrgow v2, v3, v2
-; P9-AIX-32-NEXT: blr
-;
-; P8-AIX-64-LABEL: s2v_test_f4:
-; P8-AIX-64: # %bb.0: # %entry
-; P8-AIX-64-NEXT: addi r3, r3, 4
-; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3
-; P8-AIX-64-NEXT: vmrgow v2, v3, v2
-; P8-AIX-64-NEXT: blr
-;
-; P8-AIX-32-LABEL: s2v_test_f4:
-; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lfs f0, 4(r3)
-; P8-AIX-32-NEXT: xscvdpspn v3, f0
-; P8-AIX-32-NEXT: vmrgow v2, v3, v2
-; P8-AIX-32-NEXT: blr
+; AIX-LABEL: s2v_test_f4:
+; AIX: # %bb.0: # %entry
+; AIX-NEXT: addi r3, r3, 4
+; AIX-NEXT: lxsiwzx v3, 0, r3
+; AIX-NEXT: vmrgow v2, v3, v2
+; AIX-NEXT: blr
entry:
%arrayidx = getelementptr inbounds float, float* %f64, i64 1
%0 = load float, float* %arrayidx, align 8
@@ -663,31 +618,11 @@ define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr
; P8BE-NEXT: vmrgow v2, v3, v2
; P8BE-NEXT: blr
;
-; P9-AIX-64-LABEL: s2v_test_f5:
-; P9-AIX-64: # %bb.0: # %entry
-; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3
-; P9-AIX-64-NEXT: vmrgow v2, v3, v2
-; P9-AIX-64-NEXT: blr
-;
-; P9-AIX-32-LABEL: s2v_test_f5:
-; P9-AIX-32: # %bb.0: # %entry
-; P9-AIX-32-NEXT: lfs f0, 0(r3)
-; P9-AIX-32-NEXT: xscvdpspn v3, f0
-; P9-AIX-32-NEXT: vmrgow v2, v3, v2
-; P9-AIX-32-NEXT: blr
-;
-; P8-AIX-64-LABEL: s2v_test_f5:
-; P8-AIX-64: # %bb.0: # %entry
-; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3
-; P8-AIX-64-NEXT: vmrgow v2, v3, v2
-; P8-AIX-64-NEXT: blr
-;
-; P8-AIX-32-LABEL: s2v_test_f5:
-; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lfs f0, 0(r3)
-; P8-AIX-32-NEXT: xscvdpspn v3, f0
-; P8-AIX-32-NEXT: vmrgow v2, v3, v2
-; P8-AIX-32-NEXT: blr
+; AIX-LABEL: s2v_test_f5:
+; AIX: # %bb.0: # %entry
+; AIX-NEXT: lxsiwzx v3, 0, r3
+; AIX-NEXT: vmrgow v2, v3, v2
+; AIX-NEXT: blr
entry:
%0 = load float, float* %ptr1, align 8
%vecins = insertelement <2 x float> %vec, float %0, i32 0
diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
index 225367f5a886d..dd873aad5ec82 100644
--- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
@@ -659,15 +659,14 @@ define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) {
;
; AIX-P8-32-LABEL: testFloatImm2:
; AIX-P8-32: # %bb.0: # %entry
-; AIX-P8-32-NEXT: lfs f0, 0(r3)
; AIX-P8-32-NEXT: lwz r4, L..C8(r2) # %const.0
-; AIX-P8-32-NEXT: xscvdpspn v3, f0
+; AIX-P8-32-NEXT: lxsiwzx v3, 0, r3
+; AIX-P8-32-NEXT: li r5, 4
; AIX-P8-32-NEXT: lxvw4x v4, 0, r4
-; AIX-P8-32-NEXT: lfs f0, 4(r3)
-; AIX-P8-32-NEXT: lwz r3, L..C9(r2) # %const.1
+; AIX-P8-32-NEXT: lwz r4, L..C9(r2) # %const.1
; AIX-P8-32-NEXT: vperm v2, v3, v2, v4
-; AIX-P8-32-NEXT: lxvw4x v4, 0, r3
-; AIX-P8-32-NEXT: xscvdpspn v3, f0
+; AIX-P8-32-NEXT: lxsiwzx v3, r3, r5
+; AIX-P8-32-NEXT: lxvw4x v4, 0, r4
; AIX-P8-32-NEXT: vperm v2, v2, v3, v4
; AIX-P8-32-NEXT: blr
entry:
@@ -732,17 +731,15 @@ define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) {
;
; AIX-P8-32-LABEL: testFloatImm3:
; AIX-P8-32: # %bb.0: # %entry
-; AIX-P8-32-NEXT: lis r4, 4
-; AIX-P8-32-NEXT: lfsx f0, r3, r4
; AIX-P8-32-NEXT: lwz r4, L..C10(r2) # %const.0
-; AIX-P8-32-NEXT: xscvdpspn v3, f0
+; AIX-P8-32-NEXT: lis r5, 4
+; AIX-P8-32-NEXT: lxsiwzx v3, r3, r5
; AIX-P8-32-NEXT: lxvw4x v4, 0, r4
-; AIX-P8-32-NEXT: lfs f0, 0(r3)
-; AIX-P8-32-NEXT: lwz r3, L..C11(r2) # %const.1
+; AIX-P8-32-NEXT: lwz r4, L..C11(r2) # %const.1
; AIX-P8-32-NEXT: vperm v2, v3, v2, v4
-; AIX-P8-32-NEXT: lxvw4x v4, 0, r3
-; AIX-P8-32-NEXT: xscvdpspn v3, f0
-; AIX-P8-32-NEXT: vperm v2, v2, v3, v4
+; AIX-P8-32-NEXT: lxvw4x v3, 0, r4
+; AIX-P8-32-NEXT: lxsiwzx v4, 0, r3
+; AIX-P8-32-NEXT: vperm v2, v2, v4, v3
; AIX-P8-32-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i32, i32* %b, i64 65536
More information about the llvm-commits
mailing list