[llvm] 4195ed9 - [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws
Albion Fung via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 30 12:31:11 PDT 2021
Author: Albion Fung
Date: 2021-09-30T14:31:00-05:00
New Revision: 4195ed995993410b4a8ba2835fee71d4fe01c410
URL: https://github.com/llvm/llvm-project/commit/4195ed995993410b4a8ba2835fee71d4fe01c410
DIFF: https://github.com/llvm/llvm-project/commit/4195ed995993410b4a8ba2835fee71d4fe01c410.diff
LOG: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws
This patch removes the uneccessary mf/mtvsr generated in conjunction
with xscvdpsxws/xscvdpuxws.
Differential revision: https://reviews.llvm.org/D109902
Added:
Modified:
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/test/CodeGen/PowerPC/test-vector-insert.ll
llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 0855b0186dab8..b00e58010b06b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2809,6 +2809,10 @@ def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)),
def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)),
(v2i64 (XXPERMDI (SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64),
(SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64), 0))>;
+def : Pat<(v4i32 (PPCSToV DblToInt.A)),
+ (v4i32 (SUBREG_TO_REG (i64 1), (XSCVDPSXWS f64:$A), sub_64))>;
+def : Pat<(v4i32 (PPCSToV DblToUInt.A)),
+ (v4i32 (SUBREG_TO_REG (i64 1), (XSCVDPUXWS f64:$A), sub_64))>;
defm : ScalToVecWPermute<
v4i32, FltToIntLoad.A,
(XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWSs (XFLOADf32 ForceXForm:$A)), sub_64), 1),
@@ -4138,12 +4142,52 @@ def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 0))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 4))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 8))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 12))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
@@ -4382,12 +4426,52 @@ def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 12))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 8))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 4))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 0))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
diff --git a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
index edf49ccb53536..4c4d7be624cb8 100644
--- a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
+++ b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -17,8 +17,8 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefix=CHECK-BE-P9
-; xscvdpsxws and uxws is only available on Power7 and above
-; Codgen is
diff erent for LE Power7 and Power8
+; xscvdpsxws and xscvdpsxws is only available on Power7 and above
+; Codgen is
diff erent for Power7, Power8, and Power9.
define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
; CHECK-LE-P7-LABEL: test:
@@ -38,20 +38,16 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
;
; CHECK-LE-P8-LABEL: test:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: xscvdpsxws f0, f1
+; CHECK-LE-P8-NEXT: xscvdpsxws v3, f1
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-LE-P8-NEXT: lvx v3, 0, r3
-; CHECK-LE-P8-NEXT: mffprwz r4, f0
-; CHECK-LE-P8-NEXT: mtvsrwz v4, r4
-; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: lvx v4, 0, r3
+; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: xscvdpsxws f0, f1
-; CHECK-LE-P9-NEXT: mffprwz r3, f0
-; CHECK-LE-P9-NEXT: mtfprwz f0, r3
; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0
; CHECK-LE-P9-NEXT: blr
;
@@ -70,9 +66,7 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
;
; CHECK-BE-P8-LABEL: test:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: xscvdpsxws f0, f1
-; CHECK-BE-P8-NEXT: mffprwz r3, f0
-; CHECK-BE-P8-NEXT: mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT: xscvdpsxws v3, f1
; CHECK-BE-P8-NEXT: vmrghw v3, v2, v3
; CHECK-BE-P8-NEXT: xxsldwi vs0, v3, v2, 3
; CHECK-BE-P8-NEXT: xxsldwi v2, vs0, vs0, 1
@@ -81,8 +75,6 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
; CHECK-BE-P9-LABEL: test:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: xscvdpsxws f0, f1
-; CHECK-BE-P9-NEXT: mffprwz r3, f0
-; CHECK-BE-P9-NEXT: mtfprwz f0, r3
; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12
; CHECK-BE-P9-NEXT: blr
entry:
@@ -109,20 +101,16 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
;
; CHECK-LE-P8-LABEL: test2:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: xscvdpsxws f0, f1
+; CHECK-LE-P8-NEXT: xscvdpsxws v3, f1
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-LE-P8-NEXT: lvx v3, 0, r3
-; CHECK-LE-P8-NEXT: mffprwz r4, f0
-; CHECK-LE-P8-NEXT: mtvsrwz v4, r4
-; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: lvx v4, 0, r3
+; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test2:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: xscvdpsxws f0, f1
-; CHECK-LE-P9-NEXT: mffprwz r3, f0
-; CHECK-LE-P9-NEXT: mtfprwz f0, r3
; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0
; CHECK-LE-P9-NEXT: blr
;
@@ -141,9 +129,7 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
;
; CHECK-BE-P8-LABEL: test2:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: xscvdpsxws f0, f1
-; CHECK-BE-P8-NEXT: mffprwz r3, f0
-; CHECK-BE-P8-NEXT: mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT: xscvdpsxws v3, f1
; CHECK-BE-P8-NEXT: vmrghw v3, v2, v3
; CHECK-BE-P8-NEXT: xxsldwi vs0, v3, v2, 3
; CHECK-BE-P8-NEXT: xxsldwi v2, vs0, vs0, 1
@@ -152,8 +138,6 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
; CHECK-BE-P9-LABEL: test2:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: xscvdpsxws f0, f1
-; CHECK-BE-P9-NEXT: mffprwz r3, f0
-; CHECK-BE-P9-NEXT: mtfprwz f0, r3
; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12
; CHECK-BE-P9-NEXT: blr
entry:
@@ -180,20 +164,16 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
;
; CHECK-LE-P8-LABEL: test3:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: xscvdpuxws f0, f1
+; CHECK-LE-P8-NEXT: xscvdpuxws v3, f1
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-LE-P8-NEXT: lvx v3, 0, r3
-; CHECK-LE-P8-NEXT: mffprwz r4, f0
-; CHECK-LE-P8-NEXT: mtvsrwz v4, r4
-; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: lvx v4, 0, r3
+; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test3:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: xscvdpuxws f0, f1
-; CHECK-LE-P9-NEXT: mffprwz r3, f0
-; CHECK-LE-P9-NEXT: mtfprwz f0, r3
; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0
; CHECK-LE-P9-NEXT: blr
;
@@ -212,9 +192,7 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
;
; CHECK-BE-P8-LABEL: test3:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: xscvdpuxws f0, f1
-; CHECK-BE-P8-NEXT: mffprwz r3, f0
-; CHECK-BE-P8-NEXT: mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT: xscvdpuxws v3, f1
; CHECK-BE-P8-NEXT: vmrghw v3, v2, v3
; CHECK-BE-P8-NEXT: xxsldwi vs0, v3, v2, 3
; CHECK-BE-P8-NEXT: xxsldwi v2, vs0, vs0, 1
@@ -223,8 +201,6 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
; CHECK-BE-P9-LABEL: test3:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: xscvdpuxws f0, f1
-; CHECK-BE-P9-NEXT: mffprwz r3, f0
-; CHECK-BE-P9-NEXT: mtfprwz f0, r3
; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12
; CHECK-BE-P9-NEXT: blr
entry:
@@ -251,20 +227,16 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
;
; CHECK-LE-P8-LABEL: test4:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: xscvdpuxws f0, f1
+; CHECK-LE-P8-NEXT: xscvdpuxws v3, f1
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI3_0 at toc@ha
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-LE-P8-NEXT: lvx v3, 0, r3
-; CHECK-LE-P8-NEXT: mffprwz r4, f0
-; CHECK-LE-P8-NEXT: mtvsrwz v4, r4
-; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: lvx v4, 0, r3
+; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test4:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: xscvdpuxws f0, f1
-; CHECK-LE-P9-NEXT: mffprwz r3, f0
-; CHECK-LE-P9-NEXT: mtfprwz f0, r3
; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0
; CHECK-LE-P9-NEXT: blr
;
@@ -283,9 +255,7 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
;
; CHECK-BE-P8-LABEL: test4:
; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: xscvdpuxws f0, f1
-; CHECK-BE-P8-NEXT: mffprwz r3, f0
-; CHECK-BE-P8-NEXT: mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT: xscvdpuxws v3, f1
; CHECK-BE-P8-NEXT: vmrghw v3, v2, v3
; CHECK-BE-P8-NEXT: xxsldwi vs0, v3, v2, 3
; CHECK-BE-P8-NEXT: xxsldwi v2, vs0, vs0, 1
@@ -294,8 +264,6 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
; CHECK-BE-P9-LABEL: test4:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: xscvdpuxws f0, f1
-; CHECK-BE-P9-NEXT: mffprwz r3, f0
-; CHECK-BE-P9-NEXT: mtfprwz f0, r3
; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12
; CHECK-BE-P9-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
index caf483e45b602..242e9966827a2 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -13,12 +13,8 @@ define i64 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xxswapd vs0, v2
-; CHECK-P8-NEXT: xscvdpuxws f1, v2
-; CHECK-P8-NEXT: xscvdpuxws f0, f0
-; CHECK-P8-NEXT: mffprwz r3, f1
-; CHECK-P8-NEXT: mtvsrwz v2, r3
-; CHECK-P8-NEXT: mffprwz r4, f0
-; CHECK-P8-NEXT: mtvsrwz v3, r4
+; CHECK-P8-NEXT: xscvdpuxws v2, v2
+; CHECK-P8-NEXT: xscvdpuxws v3, f0
; CHECK-P8-NEXT: vmrghw v2, v2, v3
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mffprd r3, f0
@@ -26,26 +22,18 @@ define i64 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: xscvdpuxws f0, v2
-; CHECK-P9-NEXT: mffprwz r3, f0
; CHECK-P9-NEXT: xxswapd vs0, v2
-; CHECK-P9-NEXT: mtvsrwz v3, r3
-; CHECK-P9-NEXT: xscvdpuxws f0, f0
-; CHECK-P9-NEXT: mffprwz r3, f0
-; CHECK-P9-NEXT: mtvsrwz v2, r3
+; CHECK-P9-NEXT: xscvdpuxws v3, v2
+; CHECK-P9-NEXT: xscvdpuxws v2, f0
; CHECK-P9-NEXT: vmrghw v2, v3, v2
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xscvdpuxws f0, v2
-; CHECK-BE-NEXT: mffprwz r3, f0
; CHECK-BE-NEXT: xxswapd vs0, v2
-; CHECK-BE-NEXT: mtvsrwz v3, r3
-; CHECK-BE-NEXT: xscvdpuxws f0, f0
-; CHECK-BE-NEXT: mffprwz r3, f0
-; CHECK-BE-NEXT: mtvsrwz v2, r3
+; CHECK-BE-NEXT: xscvdpuxws v3, v2
+; CHECK-BE-NEXT: xscvdpuxws v2, f0
; CHECK-BE-NEXT: vmrgow v2, v3, v2
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
@@ -305,12 +293,8 @@ define i64 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xxswapd vs0, v2
-; CHECK-P8-NEXT: xscvdpsxws f1, v2
-; CHECK-P8-NEXT: xscvdpsxws f0, f0
-; CHECK-P8-NEXT: mffprwz r3, f1
-; CHECK-P8-NEXT: mtvsrwz v2, r3
-; CHECK-P8-NEXT: mffprwz r4, f0
-; CHECK-P8-NEXT: mtvsrwz v3, r4
+; CHECK-P8-NEXT: xscvdpsxws v2, v2
+; CHECK-P8-NEXT: xscvdpsxws v3, f0
; CHECK-P8-NEXT: vmrghw v2, v2, v3
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mffprd r3, f0
@@ -318,26 +302,18 @@ define i64 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
;
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: xscvdpsxws f0, v2
-; CHECK-P9-NEXT: mffprwz r3, f0
; CHECK-P9-NEXT: xxswapd vs0, v2
-; CHECK-P9-NEXT: mtvsrwz v3, r3
-; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: mffprwz r3, f0
-; CHECK-P9-NEXT: mtvsrwz v2, r3
+; CHECK-P9-NEXT: xscvdpsxws v3, v2
+; CHECK-P9-NEXT: xscvdpsxws v2, f0
; CHECK-P9-NEXT: vmrghw v2, v3, v2
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xscvdpsxws f0, v2
-; CHECK-BE-NEXT: mffprwz r3, f0
; CHECK-BE-NEXT: xxswapd vs0, v2
-; CHECK-BE-NEXT: mtvsrwz v3, r3
-; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: mffprwz r3, f0
-; CHECK-BE-NEXT: mtvsrwz v2, r3
+; CHECK-BE-NEXT: xscvdpsxws v3, v2
+; CHECK-BE-NEXT: xscvdpsxws v2, f0
; CHECK-BE-NEXT: vmrgow v2, v3, v2
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
More information about the llvm-commits
mailing list