[llvm] bfa9ce1 - [PowerPC] Improve handling of some BUILD_VECTOR nodes
Nemanja Ivanovic via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 23 15:43:44 PDT 2020
Author: Nemanja Ivanovic
Date: 2020-03-23T17:34:29-05:00
New Revision: bfa9ce1cb27a6abac071c0b8fab76d647098eaeb
URL: https://github.com/llvm/llvm-project/commit/bfa9ce1cb27a6abac071c0b8fab76d647098eaeb
DIFF: https://github.com/llvm/llvm-project/commit/bfa9ce1cb27a6abac071c0b8fab76d647098eaeb.diff
LOG: [PowerPC] Improve handling of some BUILD_VECTOR nodes
An analysis of real world code turned up a number of patterns with BUILD_VECTOR
of nodes resulting from operations on extracted vector elements for which we
produce poor code. This addresses those cases. No attempt is made for
completeness as that would entail a large amount of work for something that
there is no evidence of in real code.
Differential revision: https://reviews.llvm.org/D72660
Added:
Modified:
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/test/CodeGen/PowerPC/build-vector-tests.ll
llvm/test/CodeGen/PowerPC/reduce_scalarization02.ll
llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
llvm/test/CodeGen/PowerPC/vsx.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index b12096dacdd3..73529533c26b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1341,6 +1341,21 @@ def DWToSPExtractConv {
dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2));
}
+def WToDPExtractConv {
+ dag El0S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 0))));
+ dag El1S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 1))));
+ dag El2S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 2))));
+ dag El3S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 3))));
+ dag El0U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 0))));
+ dag El1U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 1))));
+ dag El2U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 2))));
+ dag El3U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 3))));
+ dag BV02S = (v2f64 (build_vector El0S, El2S));
+ dag BV13S = (v2f64 (build_vector El1S, El3S));
+ dag BV02U = (v2f64 (build_vector El0U, El2U));
+ dag BV13U = (v2f64 (build_vector El1U, El3U));
+}
+
// The following VSX instructions were introduced in Power ISA 2.07
/* FIXME: if the operands are v2i64, these patterns will not match.
we should define new patterns or otherwise match the same patterns
@@ -4171,6 +4186,41 @@ let AddedComplexity = 400 in {
def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
ExtDbl.B0U, ExtDbl.B1U)),
(v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
+ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 1))))),
+ (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>;
+ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 0))))),
+ (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)),
+ (XVCVSPDP (XXMRGHW $A, $A)), 2))>;
+ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 2))))),
+ (v2f64 (XVCVSPDP $A))>;
+ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 3))))),
+ (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 3)))>;
+ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
+ (f64 (fpextend (extractelt v4f32:$A, 3))))),
+ (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
+ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
+ (f64 (fpextend (extractelt v4f32:$A, 2))))),
+ (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)),
+ (XVCVSPDP (XXMRGLW $A, $A)), 2))>;
+ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$B, 0))))),
+ (v2f64 (XVCVSPDP (XXPERMDI $A, $B, 0)))>;
+ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
+ (f64 (fpextend (extractelt v4f32:$B, 3))))),
+ (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $A, $B, 3),
+ (XXPERMDI $A, $B, 3), 1)))>;
+ def : Pat<WToDPExtractConv.BV02S,
+ (v2f64 (XVCVSXWDP $A))>;
+ def : Pat<WToDPExtractConv.BV13S,
+ (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 3)))>;
+ def : Pat<WToDPExtractConv.BV02U,
+ (v2f64 (XVCVUXWDP $A))>;
+ def : Pat<WToDPExtractConv.BV13U,
+ (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 3)))>;
}
let Predicates = [IsLittleEndian, HasP8Vector] in {
@@ -4249,6 +4299,41 @@ let AddedComplexity = 400 in {
def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
ExtDbl.B0U, ExtDbl.B1U)),
(v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>;
+ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 1))))),
+ (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
+ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 0))))),
+ (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)),
+ (XVCVSPDP (XXMRGLW $A, $A)), 2))>;
+ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 2))))),
+ (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 1)))>;
+ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 3))))),
+ (v2f64 (XVCVSPDP $A))>;
+ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
+ (f64 (fpextend (extractelt v4f32:$A, 3))))),
+ (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>;
+ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
+ (f64 (fpextend (extractelt v4f32:$A, 2))))),
+ (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)),
+ (XVCVSPDP (XXMRGHW $A, $A)), 2))>;
+ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$B, 0))))),
+ (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $B, $A, 3),
+ (XXPERMDI $B, $A, 3), 1)))>;
+ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
+ (f64 (fpextend (extractelt v4f32:$B, 3))))),
+ (v2f64 (XVCVSPDP (XXPERMDI $B, $A, 0)))>;
+ def : Pat<WToDPExtractConv.BV02S,
+ (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 1)))>;
+ def : Pat<WToDPExtractConv.BV13S,
+ (v2f64 (XVCVSXWDP $A))>;
+ def : Pat<WToDPExtractConv.BV02U,
+ (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 1)))>;
+ def : Pat<WToDPExtractConv.BV13U,
+ (v2f64 (XVCVUXWDP $A))>;
}
let Predicates = [HasDirectMove] in {
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index 4e096b1c5c03..469cef01094b 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -6123,3 +6123,412 @@ entry:
%splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %splat.splat
}
+
+; Some additional patterns that come up in real code.
+define dso_local <2 x double> @sint_to_fp_widen02(<4 x i32> %a) {
+; P9BE-LABEL: sint_to_fp_widen02:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: xvcvsxwdp v2, v2
+; P9BE-NEXT: blr
+;
+; P9LE-LABEL: sint_to_fp_widen02:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: xxsldwi vs0, v2, v2, 1
+; P9LE-NEXT: xvcvsxwdp v2, vs0
+; P9LE-NEXT: blr
+;
+; P8BE-LABEL: sint_to_fp_widen02:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: xvcvsxwdp v2, v2
+; P8BE-NEXT: blr
+;
+; P8LE-LABEL: sint_to_fp_widen02:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: xxsldwi vs0, v2, v2, 1
+; P8LE-NEXT: xvcvsxwdp v2, vs0
+; P8LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x i32> %a, i32 0
+ %conv = sitofp i32 %vecext to double
+ %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+ %vecext1 = extractelement <4 x i32> %a, i32 2
+ %conv2 = sitofp i32 %vecext1 to double
+ %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+ ret <2 x double> %vecinit3
+}
+
+define dso_local <2 x double> @sint_to_fp_widen13(<4 x i32> %a) {
+; P9BE-LABEL: sint_to_fp_widen13:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: xxsldwi vs0, v2, v2, 3
+; P9BE-NEXT: xvcvsxwdp v2, vs0
+; P9BE-NEXT: blr
+;
+; P9LE-LABEL: sint_to_fp_widen13:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: xvcvsxwdp v2, v2
+; P9LE-NEXT: blr
+;
+; P8BE-LABEL: sint_to_fp_widen13:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: xxsldwi vs0, v2, v2, 3
+; P8BE-NEXT: xvcvsxwdp v2, vs0
+; P8BE-NEXT: blr
+;
+; P8LE-LABEL: sint_to_fp_widen13:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: xvcvsxwdp v2, v2
+; P8LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x i32> %a, i32 1
+ %conv = sitofp i32 %vecext to double
+ %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+ %vecext1 = extractelement <4 x i32> %a, i32 3
+ %conv2 = sitofp i32 %vecext1 to double
+ %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+ ret <2 x double> %vecinit3
+}
+
+define dso_local <2 x double> @uint_to_fp_widen02(<4 x i32> %a) {
+; P9BE-LABEL: uint_to_fp_widen02:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: xvcvuxwdp v2, v2
+; P9BE-NEXT: blr
+;
+; P9LE-LABEL: uint_to_fp_widen02:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: xxsldwi vs0, v2, v2, 1
+; P9LE-NEXT: xvcvuxwdp v2, vs0
+; P9LE-NEXT: blr
+;
+; P8BE-LABEL: uint_to_fp_widen02:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: xvcvuxwdp v2, v2
+; P8BE-NEXT: blr
+;
+; P8LE-LABEL: uint_to_fp_widen02:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: xxsldwi vs0, v2, v2, 1
+; P8LE-NEXT: xvcvuxwdp v2, vs0
+; P8LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x i32> %a, i32 0
+ %conv = uitofp i32 %vecext to double
+ %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+ %vecext1 = extractelement <4 x i32> %a, i32 2
+ %conv2 = uitofp i32 %vecext1 to double
+ %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+ ret <2 x double> %vecinit3
+}
+
+define dso_local <2 x double> @uint_to_fp_widen13(<4 x i32> %a) {
+; P9BE-LABEL: uint_to_fp_widen13:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: xxsldwi vs0, v2, v2, 3
+; P9BE-NEXT: xvcvuxwdp v2, vs0
+; P9BE-NEXT: blr
+;
+; P9LE-LABEL: uint_to_fp_widen13:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: xvcvuxwdp v2, v2
+; P9LE-NEXT: blr
+;
+; P8BE-LABEL: uint_to_fp_widen13:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: xxsldwi vs0, v2, v2, 3
+; P8BE-NEXT: xvcvuxwdp v2, vs0
+; P8BE-NEXT: blr
+;
+; P8LE-LABEL: uint_to_fp_widen13:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: xvcvuxwdp v2, v2
+; P8LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x i32> %a, i32 1
+ %conv = uitofp i32 %vecext to double
+ %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+ %vecext1 = extractelement <4 x i32> %a, i32 3
+ %conv2 = uitofp i32 %vecext1 to double
+ %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+ ret <2 x double> %vecinit3
+}
+
+define dso_local <2 x double> @fp_extend01(<4 x float> %a) {
+; P9BE-LABEL: fp_extend01:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: xxmrghw vs0, v2, v2
+; P9BE-NEXT: xvcvspdp v2, vs0
+; P9BE-NEXT: blr
+;
+; P9LE-LABEL: fp_extend01:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: xxmrglw vs0, v2, v2
+; P9LE-NEXT: xvcvspdp v2, vs0
+; P9LE-NEXT: blr
+;
+; P8BE-LABEL: fp_extend01:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: xxmrghw vs0, v2, v2
+; P8BE-NEXT: xvcvspdp v2, vs0
+; P8BE-NEXT: blr
+;
+; P8LE-LABEL: fp_extend01:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: xxmrglw vs0, v2, v2
+; P8LE-NEXT: xvcvspdp v2, vs0
+; P8LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x float> %a, i32 0
+ %conv = fpext float %vecext to double
+ %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+ %vecext1 = extractelement <4 x float> %a, i32 1
+ %conv2 = fpext float %vecext1 to double
+ %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+ ret <2 x double> %vecinit3
+}
+
+define dso_local <2 x double> @fp_extend10(<4 x float> %a) {
+; P9BE-LABEL: fp_extend10:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: xxmrghw vs0, v2, v2
+; P9BE-NEXT: xvcvspdp vs0, vs0
+; P9BE-NEXT: xxswapd v2, vs0
+; P9BE-NEXT: blr
+;
+; P9LE-LABEL: fp_extend10:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: xxmrglw vs0, v2, v2
+; P9LE-NEXT: xvcvspdp vs0, vs0
+; P9LE-NEXT: xxswapd v2, vs0
+; P9LE-NEXT: blr
+;
+; P8BE-LABEL: fp_extend10:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: xxmrghw vs0, v2, v2
+; P8BE-NEXT: xvcvspdp vs0, vs0
+; P8BE-NEXT: xxswapd v2, vs0
+; P8BE-NEXT: blr
+;
+; P8LE-LABEL: fp_extend10:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: xxmrglw vs0, v2, v2
+; P8LE-NEXT: xvcvspdp vs0, vs0
+; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x float> %a, i32 1
+ %conv = fpext float %vecext to double
+ %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+ %vecext1 = extractelement <4 x float> %a, i32 0
+ %conv2 = fpext float %vecext1 to double
+ %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+ ret <2 x double> %vecinit3
+}
+
+define dso_local <2 x double> @fp_extend02(<4 x float> %a) {
+; P9BE-LABEL: fp_extend02:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: xvcvspdp v2, v2
+; P9BE-NEXT: blr
+;
+; P9LE-LABEL: fp_extend02:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: xxsldwi vs0, v2, v2, 1
+; P9LE-NEXT: xvcvspdp v2, vs0
+; P9LE-NEXT: blr
+;
+; P8BE-LABEL: fp_extend02:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: xvcvspdp v2, v2
+; P8BE-NEXT: blr
+;
+; P8LE-LABEL: fp_extend02:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: xxsldwi vs0, v2, v2, 1
+; P8LE-NEXT: xvcvspdp v2, vs0
+; P8LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x float> %a, i32 0
+ %conv = fpext float %vecext to double
+ %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+ %vecext1 = extractelement <4 x float> %a, i32 2
+ %conv2 = fpext float %vecext1 to double
+ %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+ ret <2 x double> %vecinit3
+}
+
+define dso_local <2 x double> @fp_extend13(<4 x float> %a) {
+; P9BE-LABEL: fp_extend13:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: xxsldwi vs0, v2, v2, 3
+; P9BE-NEXT: xvcvspdp v2, vs0
+; P9BE-NEXT: blr
+;
+; P9LE-LABEL: fp_extend13:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: xvcvspdp v2, v2
+; P9LE-NEXT: blr
+;
+; P8BE-LABEL: fp_extend13:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: xxsldwi vs0, v2, v2, 3
+; P8BE-NEXT: xvcvspdp v2, vs0
+; P8BE-NEXT: blr
+;
+; P8LE-LABEL: fp_extend13:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: xvcvspdp v2, v2
+; P8LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x float> %a, i32 1
+ %conv = fpext float %vecext to double
+ %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+ %vecext1 = extractelement <4 x float> %a, i32 3
+ %conv2 = fpext float %vecext1 to double
+ %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+ ret <2 x double> %vecinit3
+}
+
+define dso_local <2 x double> @fp_extend23(<4 x float> %a) {
+; P9BE-LABEL: fp_extend23:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: xxmrglw vs0, v2, v2
+; P9BE-NEXT: xvcvspdp v2, vs0
+; P9BE-NEXT: blr
+;
+; P9LE-LABEL: fp_extend23:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: xxmrghw vs0, v2, v2
+; P9LE-NEXT: xvcvspdp v2, vs0
+; P9LE-NEXT: blr
+;
+; P8BE-LABEL: fp_extend23:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: xxmrglw vs0, v2, v2
+; P8BE-NEXT: xvcvspdp v2, vs0
+; P8BE-NEXT: blr
+;
+; P8LE-LABEL: fp_extend23:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: xxmrghw vs0, v2, v2
+; P8LE-NEXT: xvcvspdp v2, vs0
+; P8LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x float> %a, i32 2
+ %conv = fpext float %vecext to double
+ %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+ %vecext1 = extractelement <4 x float> %a, i32 3
+ %conv2 = fpext float %vecext1 to double
+ %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+ ret <2 x double> %vecinit3
+}
+
+define dso_local <2 x double> @fp_extend32(<4 x float> %a) {
+; P9BE-LABEL: fp_extend32:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: xxmrglw vs0, v2, v2
+; P9BE-NEXT: xvcvspdp vs0, vs0
+; P9BE-NEXT: xxswapd v2, vs0
+; P9BE-NEXT: blr
+;
+; P9LE-LABEL: fp_extend32:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: xxmrghw vs0, v2, v2
+; P9LE-NEXT: xvcvspdp vs0, vs0
+; P9LE-NEXT: xxswapd v2, vs0
+; P9LE-NEXT: blr
+;
+; P8BE-LABEL: fp_extend32:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: xxmrglw vs0, v2, v2
+; P8BE-NEXT: xvcvspdp vs0, vs0
+; P8BE-NEXT: xxswapd v2, vs0
+; P8BE-NEXT: blr
+;
+; P8LE-LABEL: fp_extend32:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: xxmrghw vs0, v2, v2
+; P8LE-NEXT: xvcvspdp vs0, vs0
+; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x float> %a, i32 3
+ %conv = fpext float %vecext to double
+ %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+ %vecext1 = extractelement <4 x float> %a, i32 2
+ %conv2 = fpext float %vecext1 to double
+ %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+ ret <2 x double> %vecinit3
+}
+
+define dso_local <2 x double> @fp_extend_two00(<4 x float> %a, <4 x float> %b) {
+; P9BE-LABEL: fp_extend_two00:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: xxmrghd vs0, v2, v3
+; P9BE-NEXT: xvcvspdp v2, vs0
+; P9BE-NEXT: blr
+;
+; P9LE-LABEL: fp_extend_two00:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: xxmrgld vs0, v3, v2
+; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; P9LE-NEXT: xvcvspdp v2, vs0
+; P9LE-NEXT: blr
+;
+; P8BE-LABEL: fp_extend_two00:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: xxmrghd vs0, v2, v3
+; P8BE-NEXT: xvcvspdp v2, vs0
+; P8BE-NEXT: blr
+;
+; P8LE-LABEL: fp_extend_two00:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: xxmrgld vs0, v3, v2
+; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; P8LE-NEXT: xvcvspdp v2, vs0
+; P8LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x float> %a, i32 0
+ %conv = fpext float %vecext to double
+ %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+ %vecext1 = extractelement <4 x float> %b, i32 0
+ %conv2 = fpext float %vecext1 to double
+ %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+ ret <2 x double> %vecinit3
+}
+
+define dso_local <2 x double> @fp_extend_two33(<4 x float> %a, <4 x float> %b) {
+; P9BE-LABEL: fp_extend_two33:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: xxmrgld vs0, v2, v3
+; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; P9BE-NEXT: xvcvspdp v2, vs0
+; P9BE-NEXT: blr
+;
+; P9LE-LABEL: fp_extend_two33:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: xxmrghd vs0, v3, v2
+; P9LE-NEXT: xvcvspdp v2, vs0
+; P9LE-NEXT: blr
+;
+; P8BE-LABEL: fp_extend_two33:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: xxmrgld vs0, v2, v3
+; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; P8BE-NEXT: xvcvspdp v2, vs0
+; P8BE-NEXT: blr
+;
+; P8LE-LABEL: fp_extend_two33:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: xxmrghd vs0, v3, v2
+; P8LE-NEXT: xvcvspdp v2, vs0
+; P8LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x float> %a, i32 3
+ %conv = fpext float %vecext to double
+ %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+ %vecext1 = extractelement <4 x float> %b, i32 3
+ %conv2 = fpext float %vecext1 to double
+ %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+ ret <2 x double> %vecinit3
+}
diff --git a/llvm/test/CodeGen/PowerPC/reduce_scalarization02.ll b/llvm/test/CodeGen/PowerPC/reduce_scalarization02.ll
index f7727d6f4ea1..1dc40edf7146 100644
--- a/llvm/test/CodeGen/PowerPC/reduce_scalarization02.ll
+++ b/llvm/test/CodeGen/PowerPC/reduce_scalarization02.ll
@@ -47,33 +47,23 @@ define dso_local void @test2(<16 x float>* nocapture readonly %a, <2 x double>*
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv vs0, 0(r3)
-; CHECK-NEXT: xxsldwi vs1, vs0, vs0, 1
-; CHECK-NEXT: xscvspdpn f2, vs0
-; CHECK-NEXT: xxsldwi vs3, vs0, vs0, 3
-; CHECK-NEXT: xxswapd vs0, vs0
-; CHECK-NEXT: xscvspdpn f1, vs1
-; CHECK-NEXT: xscvspdpn f3, vs3
-; CHECK-NEXT: xscvspdpn f0, vs0
-; CHECK-NEXT: xxmrghd vs0, vs0, vs3
-; CHECK-NEXT: xxmrghd vs1, vs2, vs1
-; CHECK-NEXT: stxv vs0, 0(r4)
-; CHECK-NEXT: stxv vs1, 0(r5)
+; CHECK-NEXT: xxmrglw vs1, vs0, vs0
+; CHECK-NEXT: xxmrghw vs0, vs0, vs0
+; CHECK-NEXT: xvcvspdp vs1, vs1
+; CHECK-NEXT: xvcvspdp vs0, vs0
+; CHECK-NEXT: stxv vs1, 0(r4)
+; CHECK-NEXT: stxv vs0, 0(r5)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test2:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 0(r3)
-; CHECK-BE-NEXT: xxswapd vs1, vs0
-; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 3
-; CHECK-BE-NEXT: xscvspdpn f3, vs0
-; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xxmrghd vs0, vs3, vs0
-; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs2
-; CHECK-BE-NEXT: stxv vs0, 0(r4)
-; CHECK-BE-NEXT: stxv vs1, 0(r5)
+; CHECK-BE-NEXT: xxmrghw vs1, vs0, vs0
+; CHECK-BE-NEXT: xxmrglw vs0, vs0, vs0
+; CHECK-BE-NEXT: xvcvspdp vs1, vs1
+; CHECK-BE-NEXT: xvcvspdp vs0, vs0
+; CHECK-BE-NEXT: stxv vs1, 0(r4)
+; CHECK-BE-NEXT: stxv vs0, 0(r5)
; CHECK-BE-NEXT: blr
entry:
%0 = load <16 x float>, <16 x float>* %a, align 16
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
index cf4a6d636207..d355dcd08b0f 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
@@ -14,10 +14,8 @@ define <2 x i64> @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mtvsrd f0, r3
; CHECK-P8-NEXT: xxswapd v2, vs0
-; CHECK-P8-NEXT: xscvspdpn f0, vs0
-; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3
-; CHECK-P8-NEXT: xscvspdpn f1, vs1
-; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs1
+; CHECK-P8-NEXT: xxmrglw vs0, v2, v2
+; CHECK-P8-NEXT: xvcvspdp vs0, vs0
; CHECK-P8-NEXT: xvcvdpuxds v2, vs0
; CHECK-P8-NEXT: blr
;
@@ -25,20 +23,16 @@ define <2 x i64> @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs1
+; CHECK-P9-NEXT: xxmrglw vs0, v2, v2
+; CHECK-P9-NEXT: xvcvspdp vs0, vs0
; CHECK-P9-NEXT: xvcvdpuxds v2, vs0
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrd f0, r3
-; CHECK-BE-NEXT: xscvspdpn f1, vs0
-; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT: xxmrghw vs0, vs0, vs0
+; CHECK-BE-NEXT: xvcvspdp vs0, vs0
; CHECK-BE-NEXT: xvcvdpuxds v2, vs0
; CHECK-BE-NEXT: blr
entry:
@@ -50,16 +44,11 @@ entry:
define void @test4elt(<4 x i64>* noalias nocapture sret %agg.result, <4 x float> %a) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT: xxswapd vs1, v2
+; CHECK-P8-NEXT: xxmrglw vs0, v2, v2
+; CHECK-P8-NEXT: xxmrghw vs1, v2, v2
; CHECK-P8-NEXT: li r4, 16
-; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1
-; CHECK-P8-NEXT: xscvspdpn f2, v2
-; CHECK-P8-NEXT: xscvspdpn f0, vs0
-; CHECK-P8-NEXT: xscvspdpn f1, vs1
-; CHECK-P8-NEXT: xscvspdpn f3, vs3
-; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3
+; CHECK-P8-NEXT: xvcvspdp vs0, vs0
+; CHECK-P8-NEXT: xvcvspdp vs1, vs1
; CHECK-P8-NEXT: xvcvdpuxds v2, vs0
; CHECK-P8-NEXT: xvcvdpuxds v3, vs1
; CHECK-P8-NEXT: xxswapd vs1, v2
@@ -70,36 +59,26 @@ define void @test4elt(<4 x i64>* noalias nocapture sret %agg.result, <4 x float>
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-P9-NEXT: xxswapd vs1, v2
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xxsldwi vs2, v2, v2, 1
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT: xscvspdpn f1, v2
-; CHECK-P9-NEXT: xxmrghd vs1, vs1, vs2
+; CHECK-P9-NEXT: xxmrglw vs0, v2, v2
+; CHECK-P9-NEXT: xxmrghw vs1, v2, v2
+; CHECK-P9-NEXT: xvcvspdp vs0, vs0
+; CHECK-P9-NEXT: xvcvspdp vs1, vs1
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 1
-; CHECK-BE-NEXT: xscvspdpn f0, v2
-; CHECK-BE-NEXT: xxswapd vs2, v2
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs1
-; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
+; CHECK-BE-NEXT: xxmrghw vs0, v2, v2
+; CHECK-BE-NEXT: xxmrglw vs1, v2, v2
+; CHECK-BE-NEXT: xvcvspdp vs0, vs0
+; CHECK-BE-NEXT: xvcvspdp vs1, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
-; CHECK-BE-NEXT: xxmrghd vs1, vs2, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = fptoui <4 x float> %a to <4 x i64>
@@ -115,31 +94,21 @@ define void @test8elt(<8 x i64>* noalias nocapture sret %agg.result, <8 x float>
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: li r4, 48
-; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3
-; CHECK-P8-NEXT: xxswapd vs6, v3
-; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT: xxswapd vs1, v2
-; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1
-; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1
-; CHECK-P8-NEXT: xscvspdpn f2, v2
-; CHECK-P8-NEXT: xscvspdpn f4, v3
-; CHECK-P8-NEXT: xscvspdpn f0, vs0
-; CHECK-P8-NEXT: xscvspdpn f1, vs1
-; CHECK-P8-NEXT: xscvspdpn f3, vs3
-; CHECK-P8-NEXT: xscvspdpn f5, vs5
-; CHECK-P8-NEXT: xscvspdpn f6, vs6
-; CHECK-P8-NEXT: xscvspdpn f7, vs7
-; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3
-; CHECK-P8-NEXT: xxmrghd vs2, vs6, vs5
+; CHECK-P8-NEXT: xxmrglw vs2, v3, v3
+; CHECK-P8-NEXT: xxmrghw vs3, v3, v3
+; CHECK-P8-NEXT: xxmrglw vs0, v2, v2
+; CHECK-P8-NEXT: xxmrghw vs1, v2, v2
+; CHECK-P8-NEXT: xvcvspdp vs2, vs2
+; CHECK-P8-NEXT: xvcvspdp vs0, vs0
+; CHECK-P8-NEXT: xvcvspdp vs1, vs1
+; CHECK-P8-NEXT: xvcvspdp vs3, vs3
+; CHECK-P8-NEXT: xvcvdpuxds v4, vs2
; CHECK-P8-NEXT: xvcvdpuxds v2, vs0
-; CHECK-P8-NEXT: xxmrghd vs3, vs4, vs7
; CHECK-P8-NEXT: xvcvdpuxds v3, vs1
-; CHECK-P8-NEXT: xvcvdpuxds v4, vs2
; CHECK-P8-NEXT: xvcvdpuxds v5, vs3
+; CHECK-P8-NEXT: xxswapd vs3, v4
; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, v3
-; CHECK-P8-NEXT: xxswapd vs3, v4
; CHECK-P8-NEXT: xxswapd vs2, v5
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
@@ -149,65 +118,45 @@ define void @test8elt(<8 x i64>* noalias nocapture sret %agg.result, <8 x float>
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 0(r4)
-; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3
-; CHECK-P9-NEXT: xxswapd vs2, vs0
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xscvspdpn f3, vs0
-; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: xxmrghd vs1, vs2, vs1
-; CHECK-P9-NEXT: lxv vs2, 16(r4)
-; CHECK-P9-NEXT: xxmrghd vs0, vs3, vs0
+; CHECK-P9-NEXT: lxv vs0, 16(r4)
+; CHECK-P9-NEXT: lxv vs1, 0(r4)
+; CHECK-P9-NEXT: xxmrglw vs2, vs1, vs1
+; CHECK-P9-NEXT: xxmrghw vs1, vs1, vs1
+; CHECK-P9-NEXT: xxmrglw vs3, vs0, vs0
+; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs0
+; CHECK-P9-NEXT: xvcvspdp vs2, vs2
+; CHECK-P9-NEXT: xvcvspdp vs1, vs1
+; CHECK-P9-NEXT: xvcvspdp vs3, vs3
+; CHECK-P9-NEXT: xvcvspdp vs0, vs0
+; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
-; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
-; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3
-; CHECK-P9-NEXT: xxswapd vs4, vs2
-; CHECK-P9-NEXT: xscvspdpn f3, vs3
-; CHECK-P9-NEXT: xscvspdpn f4, vs4
-; CHECK-P9-NEXT: stxv vs0, 16(r3)
-; CHECK-P9-NEXT: xxmrghd vs3, vs4, vs3
-; CHECK-P9-NEXT: xscvspdpn f4, vs2
-; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
-; CHECK-P9-NEXT: xxmrghd vs2, vs4, vs2
-; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
+; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT: stxv vs0, 48(r3)
; CHECK-P9-NEXT: stxv vs3, 32(r3)
-; CHECK-P9-NEXT: stxv vs2, 48(r3)
-; CHECK-P9-NEXT: stxv vs1, 0(r3)
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: stxv vs2, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs1, 0(r4)
-; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 1
-; CHECK-BE-NEXT: xscvspdpn f2, vs1
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
; CHECK-BE-NEXT: lxv vs0, 16(r4)
-; CHECK-BE-NEXT: xxsldwi vs4, vs0, vs0, 1
-; CHECK-BE-NEXT: xscvspdpn f4, vs4
-; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs3
-; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3
-; CHECK-BE-NEXT: xxswapd vs1, vs1
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs3
-; CHECK-BE-NEXT: xscvspdpn f3, vs0
-; CHECK-BE-NEXT: xxmrghd vs3, vs3, vs4
-; CHECK-BE-NEXT: xxsldwi vs4, vs0, vs0, 3
-; CHECK-BE-NEXT: xxswapd vs0, vs0
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f4, vs4
-; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs4
+; CHECK-BE-NEXT: lxv vs1, 0(r4)
+; CHECK-BE-NEXT: xxmrghw vs2, vs1, vs1
+; CHECK-BE-NEXT: xxmrglw vs1, vs1, vs1
+; CHECK-BE-NEXT: xxmrghw vs3, vs0, vs0
+; CHECK-BE-NEXT: xxmrglw vs0, vs0, vs0
+; CHECK-BE-NEXT: xvcvspdp vs2, vs2
+; CHECK-BE-NEXT: xvcvspdp vs1, vs1
+; CHECK-BE-NEXT: xvcvspdp vs3, vs3
+; CHECK-BE-NEXT: xvcvspdp vs0, vs0
; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
-; CHECK-BE-NEXT: stxv vs3, 32(r3)
; CHECK-BE-NEXT: stxv vs0, 48(r3)
+; CHECK-BE-NEXT: stxv vs3, 32(r3)
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs2, 0(r3)
; CHECK-BE-NEXT: blr
entry:
@@ -220,70 +169,50 @@ entry:
define void @test16elt(<16 x i64>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test16elt:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r7, 48
+; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
-; CHECK-P8-NEXT: lvx v4, 0, r4
; CHECK-P8-NEXT: li r8, 64
-; CHECK-P8-NEXT: lvx v5, r4, r5
-; CHECK-P8-NEXT: lvx v3, r4, r7
-; CHECK-P8-NEXT: lvx v2, r4, r6
+; CHECK-P8-NEXT: lvx v4, r4, r7
+; CHECK-P8-NEXT: lvx v2, r4, r5
+; CHECK-P8-NEXT: lvx v3, r4, r6
+; CHECK-P8-NEXT: xxmrghw vs3, v4, v4
+; CHECK-P8-NEXT: xxmrglw vs5, v4, v4
+; CHECK-P8-NEXT: xxmrglw vs0, v2, v2
+; CHECK-P8-NEXT: xxmrghw vs1, v2, v2
+; CHECK-P8-NEXT: lvx v2, 0, r4
; CHECK-P8-NEXT: li r4, 112
-; CHECK-P8-NEXT: xxsldwi vs13, v4, v4, 3
-; CHECK-P8-NEXT: xscvspdpn f6, v4
-; CHECK-P8-NEXT: xxsldwi vs1, v5, v5, 3
-; CHECK-P8-NEXT: xxswapd vs3, v5
-; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 1
-; CHECK-P8-NEXT: xscvspdpn f4, v3
-; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1
-; CHECK-P8-NEXT: xxsldwi vs10, v3, v3, 3
-; CHECK-P8-NEXT: xscvspdpn f1, vs1
-; CHECK-P8-NEXT: xxswapd vs11, v3
-; CHECK-P8-NEXT: xscvspdpn f3, vs3
-; CHECK-P8-NEXT: xxsldwi vs7, v2, v2, 3
-; CHECK-P8-NEXT: xscvspdpn f9, vs9
-; CHECK-P8-NEXT: xxswapd vs8, v2
-; CHECK-P8-NEXT: xscvspdpn f0, v5
-; CHECK-P8-NEXT: xxsldwi vs12, v2, v2, 1
-; CHECK-P8-NEXT: xscvspdpn f2, v2
-; CHECK-P8-NEXT: xxswapd v2, v4
-; CHECK-P8-NEXT: xscvspdpn f5, vs5
-; CHECK-P8-NEXT: xxsldwi v3, v4, v4, 1
-; CHECK-P8-NEXT: xscvspdpn f10, vs10
-; CHECK-P8-NEXT: xscvspdpn f11, vs11
-; CHECK-P8-NEXT: xxmrghd vs1, vs3, vs1
-; CHECK-P8-NEXT: xscvspdpn f7, vs7
-; CHECK-P8-NEXT: xxmrghd vs4, vs4, vs9
-; CHECK-P8-NEXT: xscvspdpn f8, vs8
-; CHECK-P8-NEXT: xscvspdpn f12, vs12
-; CHECK-P8-NEXT: xscvspdpn f13, vs13
-; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs5
-; CHECK-P8-NEXT: xscvspdpn f3, v2
-; CHECK-P8-NEXT: xscvspdpn f9, v3
-; CHECK-P8-NEXT: xxmrghd vs5, vs11, vs10
-; CHECK-P8-NEXT: xvcvdpuxds v3, vs4
-; CHECK-P8-NEXT: xvcvdpuxds v2, vs1
-; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs12
-; CHECK-P8-NEXT: xxmrghd vs2, vs8, vs7
-; CHECK-P8-NEXT: xvcvdpuxds v4, vs0
-; CHECK-P8-NEXT: xxmrghd vs0, vs3, vs13
+; CHECK-P8-NEXT: xxmrglw vs2, v3, v3
+; CHECK-P8-NEXT: xxmrghw vs4, v3, v3
+; CHECK-P8-NEXT: xvcvspdp vs3, vs3
+; CHECK-P8-NEXT: xxmrglw vs6, v2, v2
+; CHECK-P8-NEXT: xxmrghw vs7, v2, v2
+; CHECK-P8-NEXT: xvcvspdp vs5, vs5
+; CHECK-P8-NEXT: xvcvspdp vs0, vs0
+; CHECK-P8-NEXT: xvcvspdp vs1, vs1
+; CHECK-P8-NEXT: xvcvspdp vs2, vs2
+; CHECK-P8-NEXT: xvcvspdp vs4, vs4
+; CHECK-P8-NEXT: xvcvspdp vs6, vs6
+; CHECK-P8-NEXT: xvcvspdp vs7, vs7
+; CHECK-P8-NEXT: xvcvdpuxds v3, vs3
; CHECK-P8-NEXT: xvcvdpuxds v5, vs5
-; CHECK-P8-NEXT: xxmrghd vs3, vs6, vs9
-; CHECK-P8-NEXT: xvcvdpuxds v0, vs1
+; CHECK-P8-NEXT: xvcvdpuxds v2, vs0
+; CHECK-P8-NEXT: xvcvdpuxds v4, vs1
+; CHECK-P8-NEXT: xvcvdpuxds v0, vs4
; CHECK-P8-NEXT: xvcvdpuxds v1, vs2
-; CHECK-P8-NEXT: xvcvdpuxds v6, vs0
+; CHECK-P8-NEXT: xvcvdpuxds v6, vs6
; CHECK-P8-NEXT: xxswapd vs0, v3
-; CHECK-P8-NEXT: xvcvdpuxds v7, vs3
-; CHECK-P8-NEXT: xxswapd vs4, v2
-; CHECK-P8-NEXT: xxswapd vs3, v4
+; CHECK-P8-NEXT: xvcvdpuxds v7, vs7
; CHECK-P8-NEXT: xxswapd vs1, v5
+; CHECK-P8-NEXT: xxswapd vs4, v2
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: li r4, 96
+; CHECK-P8-NEXT: xxswapd vs3, v4
; CHECK-P8-NEXT: xxswapd vs2, v0
-; CHECK-P8-NEXT: xxswapd vs0, v1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: xxswapd vs5, v6
; CHECK-P8-NEXT: li r4, 80
+; CHECK-P8-NEXT: xxswapd vs0, v1
+; CHECK-P8-NEXT: xxswapd vs5, v6
; CHECK-P8-NEXT: xxswapd vs1, v7
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, r3, r8
@@ -295,122 +224,82 @@ define void @test16elt(<16 x i64>* noalias nocapture sret %agg.result, <16 x flo
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs4, 16(r4)
-; CHECK-P9-NEXT: xxsldwi vs5, vs4, vs4, 3
-; CHECK-P9-NEXT: xxswapd vs6, vs4
-; CHECK-P9-NEXT: lxv vs0, 0(r4)
-; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3
-; CHECK-P9-NEXT: xxswapd vs2, vs0
-; CHECK-P9-NEXT: xscvspdpn f5, vs5
-; CHECK-P9-NEXT: xscvspdpn f6, vs6
-; CHECK-P9-NEXT: xxmrghd vs5, vs6, vs5
-; CHECK-P9-NEXT: xscvspdpn f6, vs4
-; CHECK-P9-NEXT: xxsldwi vs4, vs4, vs4, 1
-; CHECK-P9-NEXT: lxv vs3, 32(r4)
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xxswapd vs7, vs3
-; CHECK-P9-NEXT: xscvspdpn f7, vs7
-; CHECK-P9-NEXT: xscvspdpn f4, vs4
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xxmrghd vs1, vs2, vs1
-; CHECK-P9-NEXT: xscvspdpn f2, vs0
-; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: xxmrghd vs0, vs2, vs0
-; CHECK-P9-NEXT: xxmrghd vs4, vs6, vs4
-; CHECK-P9-NEXT: xxsldwi vs6, vs3, vs3, 3
+; CHECK-P9-NEXT: lxv vs0, 48(r4)
+; CHECK-P9-NEXT: lxv vs1, 0(r4)
+; CHECK-P9-NEXT: lxv vs3, 16(r4)
+; CHECK-P9-NEXT: lxv vs5, 32(r4)
+; CHECK-P9-NEXT: xxmrglw vs2, vs1, vs1
+; CHECK-P9-NEXT: xxmrghw vs1, vs1, vs1
+; CHECK-P9-NEXT: xxmrglw vs4, vs3, vs3
+; CHECK-P9-NEXT: xxmrghw vs3, vs3, vs3
+; CHECK-P9-NEXT: xxmrglw vs6, vs5, vs5
+; CHECK-P9-NEXT: xxmrghw vs5, vs5, vs5
+; CHECK-P9-NEXT: xxmrglw vs7, vs0, vs0
+; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs0
+; CHECK-P9-NEXT: xvcvspdp vs2, vs2
+; CHECK-P9-NEXT: xvcvspdp vs1, vs1
+; CHECK-P9-NEXT: xvcvspdp vs4, vs4
+; CHECK-P9-NEXT: xvcvspdp vs3, vs3
+; CHECK-P9-NEXT: xvcvspdp vs6, vs6
+; CHECK-P9-NEXT: xvcvspdp vs5, vs5
+; CHECK-P9-NEXT: xvcvspdp vs7, vs7
+; CHECK-P9-NEXT: xvcvspdp vs0, vs0
+; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
-; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5
-; CHECK-P9-NEXT: xscvspdpn f6, vs6
-; CHECK-P9-NEXT: xxmrghd vs6, vs7, vs6
-; CHECK-P9-NEXT: xscvspdpn f7, vs3
-; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1
-; CHECK-P9-NEXT: lxv vs2, 48(r4)
-; CHECK-P9-NEXT: xxswapd vs8, vs2
-; CHECK-P9-NEXT: xscvspdpn f8, vs8
; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4
-; CHECK-P9-NEXT: xscvspdpn f3, vs3
-; CHECK-P9-NEXT: xxmrghd vs3, vs7, vs3
-; CHECK-P9-NEXT: xxsldwi vs7, vs2, vs2, 3
-; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
-; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6
-; CHECK-P9-NEXT: stxv vs6, 64(r3)
-; CHECK-P9-NEXT: xscvspdpn f7, vs7
-; CHECK-P9-NEXT: xxmrghd vs7, vs8, vs7
-; CHECK-P9-NEXT: xscvspdpn f8, vs2
-; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xxmrghd vs2, vs8, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
+; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6
+; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5
; CHECK-P9-NEXT: xvcvdpuxds vs7, vs7
-; CHECK-P9-NEXT: stxv vs3, 80(r3)
-; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
+; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT: stxv vs0, 112(r3)
; CHECK-P9-NEXT: stxv vs7, 96(r3)
-; CHECK-P9-NEXT: stxv vs2, 112(r3)
-; CHECK-P9-NEXT: stxv vs4, 48(r3)
-; CHECK-P9-NEXT: stxv vs5, 32(r3)
-; CHECK-P9-NEXT: stxv vs0, 16(r3)
-; CHECK-P9-NEXT: stxv vs1, 0(r3)
+; CHECK-P9-NEXT: stxv vs5, 80(r3)
+; CHECK-P9-NEXT: stxv vs6, 64(r3)
+; CHECK-P9-NEXT: stxv vs3, 48(r3)
+; CHECK-P9-NEXT: stxv vs4, 32(r3)
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: stxv vs2, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 0(r4)
-; CHECK-BE-NEXT: lxv vs4, 16(r4)
-; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 1
-; CHECK-BE-NEXT: xscvspdpn f1, vs0
-; CHECK-BE-NEXT: xxsldwi vs5, vs0, vs0, 3
-; CHECK-BE-NEXT: xxswapd vs0, vs0
-; CHECK-BE-NEXT: xscvspdpn f5, vs5
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xxsldwi vs6, vs4, vs4, 1
-; CHECK-BE-NEXT: xscvspdpn f6, vs6
-; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs5
-; CHECK-BE-NEXT: xscvspdpn f5, vs4
-; CHECK-BE-NEXT: lxv vs3, 32(r4)
-; CHECK-BE-NEXT: xxsldwi vs7, vs3, vs3, 1
-; CHECK-BE-NEXT: xscvspdpn f7, vs7
-; CHECK-BE-NEXT: xxmrghd vs5, vs5, vs6
-; CHECK-BE-NEXT: xxsldwi vs6, vs4, vs4, 3
-; CHECK-BE-NEXT: xxswapd vs4, vs4
-; CHECK-BE-NEXT: xscvspdpn f6, vs6
-; CHECK-BE-NEXT: xscvspdpn f4, vs4
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs2
-; CHECK-BE-NEXT: lxv vs2, 48(r4)
-; CHECK-BE-NEXT: xxsldwi vs8, vs2, vs2, 1
+; CHECK-BE-NEXT: lxv vs0, 48(r4)
+; CHECK-BE-NEXT: lxv vs1, 0(r4)
+; CHECK-BE-NEXT: lxv vs3, 16(r4)
+; CHECK-BE-NEXT: lxv vs5, 32(r4)
+; CHECK-BE-NEXT: xxmrghw vs2, vs1, vs1
+; CHECK-BE-NEXT: xxmrglw vs1, vs1, vs1
+; CHECK-BE-NEXT: xxmrghw vs4, vs3, vs3
+; CHECK-BE-NEXT: xxmrglw vs3, vs3, vs3
+; CHECK-BE-NEXT: xxmrghw vs6, vs5, vs5
+; CHECK-BE-NEXT: xxmrglw vs5, vs5, vs5
+; CHECK-BE-NEXT: xxmrghw vs7, vs0, vs0
+; CHECK-BE-NEXT: xxmrglw vs0, vs0, vs0
+; CHECK-BE-NEXT: xvcvspdp vs2, vs2
+; CHECK-BE-NEXT: xvcvspdp vs1, vs1
+; CHECK-BE-NEXT: xvcvspdp vs4, vs4
+; CHECK-BE-NEXT: xvcvspdp vs3, vs3
+; CHECK-BE-NEXT: xvcvspdp vs6, vs6
+; CHECK-BE-NEXT: xvcvspdp vs5, vs5
+; CHECK-BE-NEXT: xvcvspdp vs7, vs7
+; CHECK-BE-NEXT: xvcvspdp vs0, vs0
+; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
-; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
-; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5
-; CHECK-BE-NEXT: xscvspdpn f8, vs8
-; CHECK-BE-NEXT: xxmrghd vs4, vs4, vs6
-; CHECK-BE-NEXT: xscvspdpn f6, vs3
-; CHECK-BE-NEXT: stxv vs0, 16(r3)
-; CHECK-BE-NEXT: xxmrghd vs6, vs6, vs7
-; CHECK-BE-NEXT: xxsldwi vs7, vs3, vs3, 3
-; CHECK-BE-NEXT: xxswapd vs3, vs3
-; CHECK-BE-NEXT: xscvspdpn f7, vs7
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
-; CHECK-BE-NEXT: xxmrghd vs3, vs3, vs7
-; CHECK-BE-NEXT: xscvspdpn f7, vs2
-; CHECK-BE-NEXT: xxmrghd vs7, vs7, vs8
-; CHECK-BE-NEXT: xxsldwi vs8, vs2, vs2, 3
-; CHECK-BE-NEXT: xxswapd vs2, vs2
-; CHECK-BE-NEXT: xscvspdpn f8, vs8
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs8
-; CHECK-BE-NEXT: stxv vs5, 32(r3)
; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4
-; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6
; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
+; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6
+; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5
; CHECK-BE-NEXT: xvcvdpuxds vs7, vs7
-; CHECK-BE-NEXT: stxv vs3, 80(r3)
+; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT: stxv vs0, 112(r3)
; CHECK-BE-NEXT: stxv vs7, 96(r3)
-; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
-; CHECK-BE-NEXT: stxv vs2, 112(r3)
+; CHECK-BE-NEXT: stxv vs5, 80(r3)
; CHECK-BE-NEXT: stxv vs6, 64(r3)
-; CHECK-BE-NEXT: stxv vs4, 48(r3)
-; CHECK-BE-NEXT: stxv vs1, 0(r3)
+; CHECK-BE-NEXT: stxv vs3, 48(r3)
+; CHECK-BE-NEXT: stxv vs4, 32(r3)
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: stxv vs2, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x float>, <16 x float>* %0, align 64
@@ -424,10 +313,8 @@ define <2 x i64> @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mtvsrd f0, r3
; CHECK-P8-NEXT: xxswapd v2, vs0
-; CHECK-P8-NEXT: xscvspdpn f0, vs0
-; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3
-; CHECK-P8-NEXT: xscvspdpn f1, vs1
-; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs1
+; CHECK-P8-NEXT: xxmrglw vs0, v2, v2
+; CHECK-P8-NEXT: xvcvspdp vs0, vs0
; CHECK-P8-NEXT: xvcvdpuxds v2, vs0
; CHECK-P8-NEXT: blr
;
@@ -435,20 +322,16 @@ define <2 x i64> @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs1
+; CHECK-P9-NEXT: xxmrglw vs0, v2, v2
+; CHECK-P9-NEXT: xvcvspdp vs0, vs0
; CHECK-P9-NEXT: xvcvdpuxds v2, vs0
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrd f0, r3
-; CHECK-BE-NEXT: xscvspdpn f1, vs0
-; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT: xxmrghw vs0, vs0, vs0
+; CHECK-BE-NEXT: xvcvspdp vs0, vs0
; CHECK-BE-NEXT: xvcvdpuxds v2, vs0
; CHECK-BE-NEXT: blr
entry:
@@ -460,16 +343,11 @@ entry:
define void @test4elt_signed(<4 x i64>* noalias nocapture sret %agg.result, <4 x float> %a) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt_signed:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT: xxswapd vs1, v2
+; CHECK-P8-NEXT: xxmrglw vs0, v2, v2
+; CHECK-P8-NEXT: xxmrghw vs1, v2, v2
; CHECK-P8-NEXT: li r4, 16
-; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1
-; CHECK-P8-NEXT: xscvspdpn f2, v2
-; CHECK-P8-NEXT: xscvspdpn f0, vs0
-; CHECK-P8-NEXT: xscvspdpn f1, vs1
-; CHECK-P8-NEXT: xscvspdpn f3, vs3
-; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3
+; CHECK-P8-NEXT: xvcvspdp vs0, vs0
+; CHECK-P8-NEXT: xvcvspdp vs1, vs1
; CHECK-P8-NEXT: xvcvdpuxds v2, vs0
; CHECK-P8-NEXT: xvcvdpuxds v3, vs1
; CHECK-P8-NEXT: xxswapd vs1, v2
@@ -480,36 +358,26 @@ define void @test4elt_signed(<4 x i64>* noalias nocapture sret %agg.result, <4 x
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-P9-NEXT: xxswapd vs1, v2
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xxsldwi vs2, v2, v2, 1
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT: xscvspdpn f1, v2
-; CHECK-P9-NEXT: xxmrghd vs1, vs1, vs2
+; CHECK-P9-NEXT: xxmrglw vs0, v2, v2
+; CHECK-P9-NEXT: xxmrghw vs1, v2, v2
+; CHECK-P9-NEXT: xvcvspdp vs0, vs0
+; CHECK-P9-NEXT: xvcvspdp vs1, vs1
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 1
-; CHECK-BE-NEXT: xscvspdpn f0, v2
-; CHECK-BE-NEXT: xxswapd vs2, v2
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs1
-; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
+; CHECK-BE-NEXT: xxmrghw vs0, v2, v2
+; CHECK-BE-NEXT: xxmrglw vs1, v2, v2
+; CHECK-BE-NEXT: xvcvspdp vs0, vs0
+; CHECK-BE-NEXT: xvcvspdp vs1, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
-; CHECK-BE-NEXT: xxmrghd vs1, vs2, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = fptoui <4 x float> %a to <4 x i64>
@@ -525,31 +393,21 @@ define void @test8elt_signed(<8 x i64>* noalias nocapture sret %agg.result, <8 x
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: li r4, 48
-; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3
-; CHECK-P8-NEXT: xxswapd vs6, v3
-; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT: xxswapd vs1, v2
-; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1
-; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1
-; CHECK-P8-NEXT: xscvspdpn f2, v2
-; CHECK-P8-NEXT: xscvspdpn f4, v3
-; CHECK-P8-NEXT: xscvspdpn f0, vs0
-; CHECK-P8-NEXT: xscvspdpn f1, vs1
-; CHECK-P8-NEXT: xscvspdpn f3, vs3
-; CHECK-P8-NEXT: xscvspdpn f5, vs5
-; CHECK-P8-NEXT: xscvspdpn f6, vs6
-; CHECK-P8-NEXT: xscvspdpn f7, vs7
-; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3
-; CHECK-P8-NEXT: xxmrghd vs2, vs6, vs5
+; CHECK-P8-NEXT: xxmrglw vs2, v3, v3
+; CHECK-P8-NEXT: xxmrghw vs3, v3, v3
+; CHECK-P8-NEXT: xxmrglw vs0, v2, v2
+; CHECK-P8-NEXT: xxmrghw vs1, v2, v2
+; CHECK-P8-NEXT: xvcvspdp vs2, vs2
+; CHECK-P8-NEXT: xvcvspdp vs0, vs0
+; CHECK-P8-NEXT: xvcvspdp vs1, vs1
+; CHECK-P8-NEXT: xvcvspdp vs3, vs3
+; CHECK-P8-NEXT: xvcvdpuxds v4, vs2
; CHECK-P8-NEXT: xvcvdpuxds v2, vs0
-; CHECK-P8-NEXT: xxmrghd vs3, vs4, vs7
; CHECK-P8-NEXT: xvcvdpuxds v3, vs1
-; CHECK-P8-NEXT: xvcvdpuxds v4, vs2
; CHECK-P8-NEXT: xvcvdpuxds v5, vs3
+; CHECK-P8-NEXT: xxswapd vs3, v4
; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, v3
-; CHECK-P8-NEXT: xxswapd vs3, v4
; CHECK-P8-NEXT: xxswapd vs2, v5
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
@@ -559,65 +417,45 @@ define void @test8elt_signed(<8 x i64>* noalias nocapture sret %agg.result, <8 x
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 0(r4)
-; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3
-; CHECK-P9-NEXT: xxswapd vs2, vs0
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xscvspdpn f3, vs0
-; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: xxmrghd vs1, vs2, vs1
-; CHECK-P9-NEXT: lxv vs2, 16(r4)
-; CHECK-P9-NEXT: xxmrghd vs0, vs3, vs0
+; CHECK-P9-NEXT: lxv vs0, 16(r4)
+; CHECK-P9-NEXT: lxv vs1, 0(r4)
+; CHECK-P9-NEXT: xxmrglw vs2, vs1, vs1
+; CHECK-P9-NEXT: xxmrghw vs1, vs1, vs1
+; CHECK-P9-NEXT: xxmrglw vs3, vs0, vs0
+; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs0
+; CHECK-P9-NEXT: xvcvspdp vs2, vs2
+; CHECK-P9-NEXT: xvcvspdp vs1, vs1
+; CHECK-P9-NEXT: xvcvspdp vs3, vs3
+; CHECK-P9-NEXT: xvcvspdp vs0, vs0
+; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
-; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
-; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3
-; CHECK-P9-NEXT: xxswapd vs4, vs2
-; CHECK-P9-NEXT: xscvspdpn f3, vs3
-; CHECK-P9-NEXT: xscvspdpn f4, vs4
-; CHECK-P9-NEXT: stxv vs0, 16(r3)
-; CHECK-P9-NEXT: xxmrghd vs3, vs4, vs3
-; CHECK-P9-NEXT: xscvspdpn f4, vs2
-; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
-; CHECK-P9-NEXT: xxmrghd vs2, vs4, vs2
-; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
+; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT: stxv vs0, 48(r3)
; CHECK-P9-NEXT: stxv vs3, 32(r3)
-; CHECK-P9-NEXT: stxv vs2, 48(r3)
-; CHECK-P9-NEXT: stxv vs1, 0(r3)
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: stxv vs2, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs1, 0(r4)
-; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 1
-; CHECK-BE-NEXT: xscvspdpn f2, vs1
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
; CHECK-BE-NEXT: lxv vs0, 16(r4)
-; CHECK-BE-NEXT: xxsldwi vs4, vs0, vs0, 1
-; CHECK-BE-NEXT: xscvspdpn f4, vs4
-; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs3
-; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3
-; CHECK-BE-NEXT: xxswapd vs1, vs1
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs3
-; CHECK-BE-NEXT: xscvspdpn f3, vs0
-; CHECK-BE-NEXT: xxmrghd vs3, vs3, vs4
-; CHECK-BE-NEXT: xxsldwi vs4, vs0, vs0, 3
-; CHECK-BE-NEXT: xxswapd vs0, vs0
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f4, vs4
-; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs4
+; CHECK-BE-NEXT: lxv vs1, 0(r4)
+; CHECK-BE-NEXT: xxmrghw vs2, vs1, vs1
+; CHECK-BE-NEXT: xxmrglw vs1, vs1, vs1
+; CHECK-BE-NEXT: xxmrghw vs3, vs0, vs0
+; CHECK-BE-NEXT: xxmrglw vs0, vs0, vs0
+; CHECK-BE-NEXT: xvcvspdp vs2, vs2
+; CHECK-BE-NEXT: xvcvspdp vs1, vs1
+; CHECK-BE-NEXT: xvcvspdp vs3, vs3
+; CHECK-BE-NEXT: xvcvspdp vs0, vs0
; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
-; CHECK-BE-NEXT: stxv vs3, 32(r3)
; CHECK-BE-NEXT: stxv vs0, 48(r3)
+; CHECK-BE-NEXT: stxv vs3, 32(r3)
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs2, 0(r3)
; CHECK-BE-NEXT: blr
entry:
@@ -630,70 +468,50 @@ entry:
define void @test16elt_signed(<16 x i64>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test16elt_signed:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r7, 48
+; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
-; CHECK-P8-NEXT: lvx v4, 0, r4
; CHECK-P8-NEXT: li r8, 64
-; CHECK-P8-NEXT: lvx v5, r4, r5
-; CHECK-P8-NEXT: lvx v3, r4, r7
-; CHECK-P8-NEXT: lvx v2, r4, r6
+; CHECK-P8-NEXT: lvx v4, r4, r7
+; CHECK-P8-NEXT: lvx v2, r4, r5
+; CHECK-P8-NEXT: lvx v3, r4, r6
+; CHECK-P8-NEXT: xxmrghw vs3, v4, v4
+; CHECK-P8-NEXT: xxmrglw vs5, v4, v4
+; CHECK-P8-NEXT: xxmrglw vs0, v2, v2
+; CHECK-P8-NEXT: xxmrghw vs1, v2, v2
+; CHECK-P8-NEXT: lvx v2, 0, r4
; CHECK-P8-NEXT: li r4, 112
-; CHECK-P8-NEXT: xxsldwi vs13, v4, v4, 3
-; CHECK-P8-NEXT: xscvspdpn f6, v4
-; CHECK-P8-NEXT: xxsldwi vs1, v5, v5, 3
-; CHECK-P8-NEXT: xxswapd vs3, v5
-; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 1
-; CHECK-P8-NEXT: xscvspdpn f4, v3
-; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1
-; CHECK-P8-NEXT: xxsldwi vs10, v3, v3, 3
-; CHECK-P8-NEXT: xscvspdpn f1, vs1
-; CHECK-P8-NEXT: xxswapd vs11, v3
-; CHECK-P8-NEXT: xscvspdpn f3, vs3
-; CHECK-P8-NEXT: xxsldwi vs7, v2, v2, 3
-; CHECK-P8-NEXT: xscvspdpn f9, vs9
-; CHECK-P8-NEXT: xxswapd vs8, v2
-; CHECK-P8-NEXT: xscvspdpn f0, v5
-; CHECK-P8-NEXT: xxsldwi vs12, v2, v2, 1
-; CHECK-P8-NEXT: xscvspdpn f2, v2
-; CHECK-P8-NEXT: xxswapd v2, v4
-; CHECK-P8-NEXT: xscvspdpn f5, vs5
-; CHECK-P8-NEXT: xxsldwi v3, v4, v4, 1
-; CHECK-P8-NEXT: xscvspdpn f10, vs10
-; CHECK-P8-NEXT: xscvspdpn f11, vs11
-; CHECK-P8-NEXT: xxmrghd vs1, vs3, vs1
-; CHECK-P8-NEXT: xscvspdpn f7, vs7
-; CHECK-P8-NEXT: xxmrghd vs4, vs4, vs9
-; CHECK-P8-NEXT: xscvspdpn f8, vs8
-; CHECK-P8-NEXT: xscvspdpn f12, vs12
-; CHECK-P8-NEXT: xscvspdpn f13, vs13
-; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs5
-; CHECK-P8-NEXT: xscvspdpn f3, v2
-; CHECK-P8-NEXT: xscvspdpn f9, v3
-; CHECK-P8-NEXT: xxmrghd vs5, vs11, vs10
-; CHECK-P8-NEXT: xvcvdpuxds v3, vs4
-; CHECK-P8-NEXT: xvcvdpuxds v2, vs1
-; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs12
-; CHECK-P8-NEXT: xxmrghd vs2, vs8, vs7
-; CHECK-P8-NEXT: xvcvdpuxds v4, vs0
-; CHECK-P8-NEXT: xxmrghd vs0, vs3, vs13
+; CHECK-P8-NEXT: xxmrglw vs2, v3, v3
+; CHECK-P8-NEXT: xxmrghw vs4, v3, v3
+; CHECK-P8-NEXT: xvcvspdp vs3, vs3
+; CHECK-P8-NEXT: xxmrglw vs6, v2, v2
+; CHECK-P8-NEXT: xxmrghw vs7, v2, v2
+; CHECK-P8-NEXT: xvcvspdp vs5, vs5
+; CHECK-P8-NEXT: xvcvspdp vs0, vs0
+; CHECK-P8-NEXT: xvcvspdp vs1, vs1
+; CHECK-P8-NEXT: xvcvspdp vs2, vs2
+; CHECK-P8-NEXT: xvcvspdp vs4, vs4
+; CHECK-P8-NEXT: xvcvspdp vs6, vs6
+; CHECK-P8-NEXT: xvcvspdp vs7, vs7
+; CHECK-P8-NEXT: xvcvdpuxds v3, vs3
; CHECK-P8-NEXT: xvcvdpuxds v5, vs5
-; CHECK-P8-NEXT: xxmrghd vs3, vs6, vs9
-; CHECK-P8-NEXT: xvcvdpuxds v0, vs1
+; CHECK-P8-NEXT: xvcvdpuxds v2, vs0
+; CHECK-P8-NEXT: xvcvdpuxds v4, vs1
+; CHECK-P8-NEXT: xvcvdpuxds v0, vs4
; CHECK-P8-NEXT: xvcvdpuxds v1, vs2
-; CHECK-P8-NEXT: xvcvdpuxds v6, vs0
+; CHECK-P8-NEXT: xvcvdpuxds v6, vs6
; CHECK-P8-NEXT: xxswapd vs0, v3
-; CHECK-P8-NEXT: xvcvdpuxds v7, vs3
-; CHECK-P8-NEXT: xxswapd vs4, v2
-; CHECK-P8-NEXT: xxswapd vs3, v4
+; CHECK-P8-NEXT: xvcvdpuxds v7, vs7
; CHECK-P8-NEXT: xxswapd vs1, v5
+; CHECK-P8-NEXT: xxswapd vs4, v2
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: li r4, 96
+; CHECK-P8-NEXT: xxswapd vs3, v4
; CHECK-P8-NEXT: xxswapd vs2, v0
-; CHECK-P8-NEXT: xxswapd vs0, v1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: xxswapd vs5, v6
; CHECK-P8-NEXT: li r4, 80
+; CHECK-P8-NEXT: xxswapd vs0, v1
+; CHECK-P8-NEXT: xxswapd vs5, v6
; CHECK-P8-NEXT: xxswapd vs1, v7
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, r3, r8
@@ -705,122 +523,82 @@ define void @test16elt_signed(<16 x i64>* noalias nocapture sret %agg.result, <1
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs4, 16(r4)
-; CHECK-P9-NEXT: xxsldwi vs5, vs4, vs4, 3
-; CHECK-P9-NEXT: xxswapd vs6, vs4
-; CHECK-P9-NEXT: lxv vs0, 0(r4)
-; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3
-; CHECK-P9-NEXT: xxswapd vs2, vs0
-; CHECK-P9-NEXT: xscvspdpn f5, vs5
-; CHECK-P9-NEXT: xscvspdpn f6, vs6
-; CHECK-P9-NEXT: xxmrghd vs5, vs6, vs5
-; CHECK-P9-NEXT: xscvspdpn f6, vs4
-; CHECK-P9-NEXT: xxsldwi vs4, vs4, vs4, 1
-; CHECK-P9-NEXT: lxv vs3, 32(r4)
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xxswapd vs7, vs3
-; CHECK-P9-NEXT: xscvspdpn f7, vs7
-; CHECK-P9-NEXT: xscvspdpn f4, vs4
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xxmrghd vs1, vs2, vs1
-; CHECK-P9-NEXT: xscvspdpn f2, vs0
-; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: xxmrghd vs0, vs2, vs0
-; CHECK-P9-NEXT: xxmrghd vs4, vs6, vs4
-; CHECK-P9-NEXT: xxsldwi vs6, vs3, vs3, 3
+; CHECK-P9-NEXT: lxv vs0, 48(r4)
+; CHECK-P9-NEXT: lxv vs1, 0(r4)
+; CHECK-P9-NEXT: lxv vs3, 16(r4)
+; CHECK-P9-NEXT: lxv vs5, 32(r4)
+; CHECK-P9-NEXT: xxmrglw vs2, vs1, vs1
+; CHECK-P9-NEXT: xxmrghw vs1, vs1, vs1
+; CHECK-P9-NEXT: xxmrglw vs4, vs3, vs3
+; CHECK-P9-NEXT: xxmrghw vs3, vs3, vs3
+; CHECK-P9-NEXT: xxmrglw vs6, vs5, vs5
+; CHECK-P9-NEXT: xxmrghw vs5, vs5, vs5
+; CHECK-P9-NEXT: xxmrglw vs7, vs0, vs0
+; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs0
+; CHECK-P9-NEXT: xvcvspdp vs2, vs2
+; CHECK-P9-NEXT: xvcvspdp vs1, vs1
+; CHECK-P9-NEXT: xvcvspdp vs4, vs4
+; CHECK-P9-NEXT: xvcvspdp vs3, vs3
+; CHECK-P9-NEXT: xvcvspdp vs6, vs6
+; CHECK-P9-NEXT: xvcvspdp vs5, vs5
+; CHECK-P9-NEXT: xvcvspdp vs7, vs7
+; CHECK-P9-NEXT: xvcvspdp vs0, vs0
+; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
-; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5
-; CHECK-P9-NEXT: xscvspdpn f6, vs6
-; CHECK-P9-NEXT: xxmrghd vs6, vs7, vs6
-; CHECK-P9-NEXT: xscvspdpn f7, vs3
-; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1
-; CHECK-P9-NEXT: lxv vs2, 48(r4)
-; CHECK-P9-NEXT: xxswapd vs8, vs2
-; CHECK-P9-NEXT: xscvspdpn f8, vs8
; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4
-; CHECK-P9-NEXT: xscvspdpn f3, vs3
-; CHECK-P9-NEXT: xxmrghd vs3, vs7, vs3
-; CHECK-P9-NEXT: xxsldwi vs7, vs2, vs2, 3
-; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
-; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6
-; CHECK-P9-NEXT: stxv vs6, 64(r3)
-; CHECK-P9-NEXT: xscvspdpn f7, vs7
-; CHECK-P9-NEXT: xxmrghd vs7, vs8, vs7
-; CHECK-P9-NEXT: xscvspdpn f8, vs2
-; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xxmrghd vs2, vs8, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
+; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6
+; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5
; CHECK-P9-NEXT: xvcvdpuxds vs7, vs7
-; CHECK-P9-NEXT: stxv vs3, 80(r3)
-; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
+; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT: stxv vs0, 112(r3)
; CHECK-P9-NEXT: stxv vs7, 96(r3)
-; CHECK-P9-NEXT: stxv vs2, 112(r3)
-; CHECK-P9-NEXT: stxv vs4, 48(r3)
-; CHECK-P9-NEXT: stxv vs5, 32(r3)
-; CHECK-P9-NEXT: stxv vs0, 16(r3)
-; CHECK-P9-NEXT: stxv vs1, 0(r3)
+; CHECK-P9-NEXT: stxv vs5, 80(r3)
+; CHECK-P9-NEXT: stxv vs6, 64(r3)
+; CHECK-P9-NEXT: stxv vs3, 48(r3)
+; CHECK-P9-NEXT: stxv vs4, 32(r3)
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: stxv vs2, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 0(r4)
-; CHECK-BE-NEXT: lxv vs4, 16(r4)
-; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 1
-; CHECK-BE-NEXT: xscvspdpn f1, vs0
-; CHECK-BE-NEXT: xxsldwi vs5, vs0, vs0, 3
-; CHECK-BE-NEXT: xxswapd vs0, vs0
-; CHECK-BE-NEXT: xscvspdpn f5, vs5
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xxsldwi vs6, vs4, vs4, 1
-; CHECK-BE-NEXT: xscvspdpn f6, vs6
-; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs5
-; CHECK-BE-NEXT: xscvspdpn f5, vs4
-; CHECK-BE-NEXT: lxv vs3, 32(r4)
-; CHECK-BE-NEXT: xxsldwi vs7, vs3, vs3, 1
-; CHECK-BE-NEXT: xscvspdpn f7, vs7
-; CHECK-BE-NEXT: xxmrghd vs5, vs5, vs6
-; CHECK-BE-NEXT: xxsldwi vs6, vs4, vs4, 3
-; CHECK-BE-NEXT: xxswapd vs4, vs4
-; CHECK-BE-NEXT: xscvspdpn f6, vs6
-; CHECK-BE-NEXT: xscvspdpn f4, vs4
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs2
-; CHECK-BE-NEXT: lxv vs2, 48(r4)
-; CHECK-BE-NEXT: xxsldwi vs8, vs2, vs2, 1
+; CHECK-BE-NEXT: lxv vs0, 48(r4)
+; CHECK-BE-NEXT: lxv vs1, 0(r4)
+; CHECK-BE-NEXT: lxv vs3, 16(r4)
+; CHECK-BE-NEXT: lxv vs5, 32(r4)
+; CHECK-BE-NEXT: xxmrghw vs2, vs1, vs1
+; CHECK-BE-NEXT: xxmrglw vs1, vs1, vs1
+; CHECK-BE-NEXT: xxmrghw vs4, vs3, vs3
+; CHECK-BE-NEXT: xxmrglw vs3, vs3, vs3
+; CHECK-BE-NEXT: xxmrghw vs6, vs5, vs5
+; CHECK-BE-NEXT: xxmrglw vs5, vs5, vs5
+; CHECK-BE-NEXT: xxmrghw vs7, vs0, vs0
+; CHECK-BE-NEXT: xxmrglw vs0, vs0, vs0
+; CHECK-BE-NEXT: xvcvspdp vs2, vs2
+; CHECK-BE-NEXT: xvcvspdp vs1, vs1
+; CHECK-BE-NEXT: xvcvspdp vs4, vs4
+; CHECK-BE-NEXT: xvcvspdp vs3, vs3
+; CHECK-BE-NEXT: xvcvspdp vs6, vs6
+; CHECK-BE-NEXT: xvcvspdp vs5, vs5
+; CHECK-BE-NEXT: xvcvspdp vs7, vs7
+; CHECK-BE-NEXT: xvcvspdp vs0, vs0
+; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
-; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
-; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5
-; CHECK-BE-NEXT: xscvspdpn f8, vs8
-; CHECK-BE-NEXT: xxmrghd vs4, vs4, vs6
-; CHECK-BE-NEXT: xscvspdpn f6, vs3
-; CHECK-BE-NEXT: stxv vs0, 16(r3)
-; CHECK-BE-NEXT: xxmrghd vs6, vs6, vs7
-; CHECK-BE-NEXT: xxsldwi vs7, vs3, vs3, 3
-; CHECK-BE-NEXT: xxswapd vs3, vs3
-; CHECK-BE-NEXT: xscvspdpn f7, vs7
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
-; CHECK-BE-NEXT: xxmrghd vs3, vs3, vs7
-; CHECK-BE-NEXT: xscvspdpn f7, vs2
-; CHECK-BE-NEXT: xxmrghd vs7, vs7, vs8
-; CHECK-BE-NEXT: xxsldwi vs8, vs2, vs2, 3
-; CHECK-BE-NEXT: xxswapd vs2, vs2
-; CHECK-BE-NEXT: xscvspdpn f8, vs8
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs8
-; CHECK-BE-NEXT: stxv vs5, 32(r3)
; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4
-; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6
; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
+; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6
+; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5
; CHECK-BE-NEXT: xvcvdpuxds vs7, vs7
-; CHECK-BE-NEXT: stxv vs3, 80(r3)
+; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT: stxv vs0, 112(r3)
; CHECK-BE-NEXT: stxv vs7, 96(r3)
-; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
-; CHECK-BE-NEXT: stxv vs2, 112(r3)
+; CHECK-BE-NEXT: stxv vs5, 80(r3)
; CHECK-BE-NEXT: stxv vs6, 64(r3)
-; CHECK-BE-NEXT: stxv vs4, 48(r3)
-; CHECK-BE-NEXT: stxv vs1, 0(r3)
+; CHECK-BE-NEXT: stxv vs3, 48(r3)
+; CHECK-BE-NEXT: stxv vs4, 32(r3)
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: stxv vs2, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x float>, <16 x float>* %0, align 64
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 9923cb02cc8d..7cf43a92a5dc 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -1554,11 +1554,8 @@ define <2 x i64> @test46(<2 x float> %a) {
;
; CHECK-LE-LABEL: test46:
; CHECK-LE: # %bb.0:
-; CHECK-LE-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-LE-NEXT: xxswapd vs1, v2
-; CHECK-LE-NEXT: xscvspdpn f0, vs0
-; CHECK-LE-NEXT: xscvspdpn f1, vs1
-; CHECK-LE-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-LE-NEXT: xxmrglw vs0, v2, v2
+; CHECK-LE-NEXT: xvcvspdp vs0, vs0
; CHECK-LE-NEXT: xvcvdpuxds v2, vs0
; CHECK-LE-NEXT: blr
%v = fptoui <2 x float> %a to <2 x i64>
@@ -1625,11 +1622,8 @@ define <2 x i64> @test47(<2 x float> %a) {
;
; CHECK-LE-LABEL: test47:
; CHECK-LE: # %bb.0:
-; CHECK-LE-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-LE-NEXT: xxswapd vs1, v2
-; CHECK-LE-NEXT: xscvspdpn f0, vs0
-; CHECK-LE-NEXT: xscvspdpn f1, vs1
-; CHECK-LE-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-LE-NEXT: xxmrglw vs0, v2, v2
+; CHECK-LE-NEXT: xvcvspdp vs0, vs0
; CHECK-LE-NEXT: xvcvdpsxds v2, vs0
; CHECK-LE-NEXT: blr
%v = fptosi <2 x float> %a to <2 x i64>
More information about the llvm-commits
mailing list