[llvm] ffbffaf - [PowerPC] Improve codegen for int-to-fp conversion of subword vector extract
Albion Fung via llvm-commits
llvm-commits at lists.llvm.org
Tue May 11 13:00:25 PDT 2021
Author: Albion Fung
Date: 2021-05-11T15:00:11-05:00
New Revision: ffbffaf6b6b0fc06abb7b43ec8de8bc61d941bc7
URL: https://github.com/llvm/llvm-project/commit/ffbffaf6b6b0fc06abb7b43ec8de8bc61d941bc7
DIFF: https://github.com/llvm/llvm-project/commit/ffbffaf6b6b0fc06abb7b43ec8de8bc61d941bc7.diff
LOG: [PowerPC] Improve codegen for int-to-fp conversion of subword vector extract
When an integer is converted into floating point in subword vector extract,
it can be done in 2 instructions instead of the 3+ instructions it generates
right now. This patch removes the uncessary generation.
Differential: https://reviews.llvm.org/D100604
Added:
llvm/test/CodeGen/PowerPC/vec-extract-itofp.ll
Modified:
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 95cf5ba95b13..66a1bc460865 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -4214,7 +4214,7 @@ foreach Idx = 0-3 in {
(f128 (XSCVUDQP (XXEXTRACTUW $src, !shl(Idx, 2))))>;
}
-// (Un)Signed HWord vector extract -> QP
+// (Un)Signed HWord vector extract -> QP/DP/SP
foreach Idx = 0-7 in {
def : Pat<(f128 (sint_to_fp
(i32 (sext_inreg
@@ -4227,6 +4227,31 @@ foreach Idx = 0-7 in {
(and (i32 (vector_extract v8i16:$src, Idx)), 65535))),
(f128 (XSCVUDQP (EXTRACT_SUBREG
(VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>;
+ def : Pat<(f32 (PPCfcfidus
+ (f64 (PPCmtvsrz (and (i32 (vector_extract v8i16:$src, Idx)),
+ 65535))))),
+ (f32 (XSCVUXDSP (EXTRACT_SUBREG
+ (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>;
+ def : Pat<(f32 (PPCfcfids
+ (f64 (PPCmtvsra
+ (i32 (sext_inreg (vector_extract v8i16:$src, Idx),
+ i16)))))),
+ (f32 (XSCVSXDSP (EXTRACT_SUBREG
+ (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)),
+ sub_64)))>;
+ def : Pat<(f64 (PPCfcfidu
+ (f64 (PPCmtvsrz
+ (and (i32 (vector_extract v8i16:$src, Idx)),
+ 65535))))),
+ (f64 (XSCVUXDDP (EXTRACT_SUBREG
+ (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>;
+ def : Pat<(f64 (PPCfcfid
+ (f64 (PPCmtvsra
+ (i32 (sext_inreg (vector_extract v8i16:$src, Idx),
+ i16)))))),
+ (f64 (XSCVSXDDP (EXTRACT_SUBREG
+ (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)),
+ sub_64)))>;
}
// (Un)Signed Byte vector extract -> QP
@@ -4240,6 +4265,33 @@ foreach Idx = 0-15 in {
(and (i32 (vector_extract v16i8:$src, Idx)), 255))),
(f128 (XSCVUDQP
(EXTRACT_SUBREG (VEXTRACTUB Idx, $src), sub_64)))>;
+
+ def : Pat<(f32 (PPCfcfidus
+ (f64 (PPCmtvsrz
+ (and (i32 (vector_extract v16i8:$src, Idx)),
+ 255))))),
+ (f32 (XSCVUXDSP (EXTRACT_SUBREG
+ (VEXTRACTUB !add(Idx, Idx), $src), sub_64)))>;
+ def : Pat<(f32 (PPCfcfids
+ (f64 (PPCmtvsra
+ (i32 (sext_inreg (vector_extract v16i8:$src, Idx),
+ i8)))))),
+ (f32 (XSCVSXDSP (EXTRACT_SUBREG
+ (VEXTSH2D (VEXTRACTUB !add(Idx, Idx), $src)),
+ sub_64)))>;
+ def : Pat<(f64 (PPCfcfidu
+ (f64 (PPCmtvsrz
+ (and (i32 (vector_extract v16i8:$src, Idx)),
+ 255))))),
+ (f64 (XSCVUXDDP (EXTRACT_SUBREG
+ (VEXTRACTUB !add(Idx, Idx), $src), sub_64)))>;
+ def : Pat<(f64 (PPCfcfid
+ (f64 (PPCmtvsra
+ (i32 (sext_inreg (vector_extract v16i8:$src, Idx),
+ i8)))))),
+ (f64 (XSCVSXDDP (EXTRACT_SUBREG
+ (VEXTSH2D (VEXTRACTUB !add(Idx, Idx), $src)),
+ sub_64)))>;
}
// Unsiged int in vsx register -> QP
@@ -4410,7 +4462,7 @@ foreach Idx = [[0,12],[1,8],[2,4],[3,0]] in {
(f128 (XSCVUDQP (XXEXTRACTUW $src, !head(!tail(Idx)))))>;
}
-// (Un)Signed HWord vector extract -> QP
+// (Un)Signed HWord vector extract -> QP/DP/SP
// The Nested foreach lists identifies the vector element and corresponding
// register byte location.
foreach Idx = [[0,14],[1,12],[2,10],[3,8],[4,6],[5,4],[6,2],[7,0]] in {
@@ -4426,9 +4478,37 @@ foreach Idx = [[0,14],[1,12],[2,10],[3,8],[4,6],[5,4],[6,2],[7,0]] in {
65535))),
(f128 (XSCVUDQP (EXTRACT_SUBREG
(VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>;
+ def : Pat<(f32 (PPCfcfidus
+ (f64 (PPCmtvsrz
+ (and (i32 (vector_extract v8i16:$src, !head(Idx))),
+ 65535))))),
+ (f32 (XSCVUXDSP (EXTRACT_SUBREG
+ (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>;
+ def : Pat<(f32 (PPCfcfids
+ (f64 (PPCmtvsra
+ (i32 (sext_inreg (vector_extract v8i16:$src,
+ !head(Idx)), i16)))))),
+ (f32 (XSCVSXDSP
+ (EXTRACT_SUBREG
+ (VEXTSH2D (VEXTRACTUH !head(!tail(Idx)), $src)),
+ sub_64)))>;
+ def : Pat<(f64 (PPCfcfidu
+ (f64 (PPCmtvsrz
+ (and (i32 (vector_extract v8i16:$src, !head(Idx))),
+ 65535))))),
+ (f64 (XSCVUXDDP (EXTRACT_SUBREG
+ (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>;
+ def : Pat<(f64 (PPCfcfid
+ (f64 (PPCmtvsra
+ (i32 (sext_inreg
+ (vector_extract v8i16:$src, !head(Idx)), i16)))))),
+ (f64 (XSCVSXDDP
+ (EXTRACT_SUBREG (VEXTSH2D
+ (VEXTRACTUH !head(!tail(Idx)), $src)),
+ sub_64)))>;
}
-// (Un)Signed Byte vector extract -> QP
+// (Un)Signed Byte vector extract -> QP/DP/SP
foreach Idx = [[0,15],[1,14],[2,13],[3,12],[4,11],[5,10],[6,9],[7,8],[8,7],
[9,6],[10,5],[11,4],[12,3],[13,2],[14,1],[15,0]] in {
def : Pat<(f128 (sint_to_fp
@@ -4444,6 +4524,44 @@ foreach Idx = [[0,15],[1,14],[2,13],[3,12],[4,11],[5,10],[6,9],[7,8],[8,7],
(f128 (XSCVUDQP
(EXTRACT_SUBREG
(VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>;
+
+ def : Pat<(f32 (PPCfcfidus
+ (f64 (PPCmtvsrz
+ (and (i32 (vector_extract v16i8:$src, !head(Idx))),
+ 255))))),
+ (f32 (XSCVUXDSP (EXTRACT_SUBREG
+ (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>;
+ def : Pat<(f32 (PPCfcfids
+ (f64 (PPCmtvsra
+ (i32 (sext_inreg
+ (vector_extract v16i8:$src, !head(Idx)), i8)))))),
+ (f32 (XSCVSXDSP
+ (EXTRACT_SUBREG (VEXTSH2D
+ (VEXTRACTUB !head(!tail(Idx)), $src)),
+ sub_64)))>;
+ def : Pat<(f64 (PPCfcfidu
+ (f64 (PPCmtvsrz
+ (and (i32
+ (vector_extract v16i8:$src, !head(Idx))), 255))))),
+ (f64 (XSCVUXDDP (EXTRACT_SUBREG
+ (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>;
+ def : Pat<(f64 (PPCfcfidu
+ (f64 (PPCmtvsra
+ (i32 (sext_inreg
+ (vector_extract v16i8:$src, !head(Idx)), i8)))))),
+ (f64 (XSCVSXDDP
+ (EXTRACT_SUBREG (VEXTSH2D
+ (VEXTRACTUB !head(!tail(Idx)), $src)),
+ sub_64)))>;
+
+ def : Pat<(f64 (PPCfcfid
+ (f64 (PPCmtvsra
+ (i32 (sext_inreg
+ (vector_extract v16i8:$src, !head(Idx)), i8)))))),
+ (f64 (XSCVSXDDP
+ (EXTRACT_SUBREG (VEXTSH2D
+ (VEXTRACTUH !head(!tail(Idx)), $src)),
+ sub_64)))>;
}
// Unsiged int in vsx register -> QP
diff --git a/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll b/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
index c04f0ff35f70..cc218d15987f 100644
--- a/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
+++ b/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
@@ -14,31 +14,19 @@
define dso_local <2 x double> @test1(<8 x i16> %a) {
; P9BE-LABEL: test1:
; P9BE: # %bb.0: # %entry
-; P9BE-NEXT: li r3, 0
-; P9BE-NEXT: vextuhlx r3, r3, v2
-; P9BE-NEXT: clrlwi r3, r3, 16
-; P9BE-NEXT: mtfprwz f0, r3
-; P9BE-NEXT: li r3, 2
-; P9BE-NEXT: vextuhlx r3, r3, v2
-; P9BE-NEXT: xscvuxddp f0, f0
-; P9BE-NEXT: clrlwi r3, r3, 16
-; P9BE-NEXT: mtfprwz f1, r3
-; P9BE-NEXT: xscvuxddp f1, f1
+; P9BE-NEXT: vextractuh v3, v2, 0
+; P9BE-NEXT: vextractuh v2, v2, 2
+; P9BE-NEXT: xscvuxddp f0, v3
+; P9BE-NEXT: xscvuxddp f1, v2
; P9BE-NEXT: xxmrghd v2, vs0, vs1
; P9BE-NEXT: blr
;
; P9LE-LABEL: test1:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: li r3, 0
-; P9LE-NEXT: vextuhrx r3, r3, v2
-; P9LE-NEXT: clrlwi r3, r3, 16
-; P9LE-NEXT: mtfprwz f0, r3
-; P9LE-NEXT: li r3, 2
-; P9LE-NEXT: vextuhrx r3, r3, v2
-; P9LE-NEXT: xscvuxddp f0, f0
-; P9LE-NEXT: clrlwi r3, r3, 16
-; P9LE-NEXT: mtfprwz f1, r3
-; P9LE-NEXT: xscvuxddp f1, f1
+; P9LE-NEXT: vextractuh v3, v2, 14
+; P9LE-NEXT: vextractuh v2, v2, 12
+; P9LE-NEXT: xscvuxddp f0, v3
+; P9LE-NEXT: xscvuxddp f1, v2
; P9LE-NEXT: xxmrghd v2, vs1, vs0
; P9LE-NEXT: blr
;
diff --git a/llvm/test/CodeGen/PowerPC/vec-extract-itofp.ll b/llvm/test/CodeGen/PowerPC/vec-extract-itofp.ll
new file mode 100644
index 000000000000..29af49815dc7
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vec-extract-itofp.ll
@@ -0,0 +1,183 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -ppc-vsr-nums-as-vr \
+; RUN: -relocation-model=pic -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN: < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-unknown -ppc-vsr-nums-as-vr \
+; RUN: -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN: < %s | FileCheck %s -check-prefix=CHECK-BE
+
+define dso_local void @testutof(<8 x i16> %a, float* nocapture %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: testutof:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vextractuh v2, v2, 14
+; CHECK-NEXT: xscvuxdsp f0, v2
+; CHECK-NEXT: stfs f0, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testutof:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vextractuh v2, v2, 0
+; CHECK-BE-NEXT: xscvuxdsp f0, v2
+; CHECK-BE-NEXT: stfs f0, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %vecext = extractelement <8 x i16> %a, i32 0
+ %conv = uitofp i16 %vecext to float
+ store float %conv, float* %ptr, align 4
+ ret void
+}
+
+define dso_local void @testutod(<8 x i16> %a, double* nocapture %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: testutod:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vextractuh v2, v2, 14
+; CHECK-NEXT: xscvuxddp f0, v2
+; CHECK-NEXT: stfd f0, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testutod:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vextractuh v2, v2, 0
+; CHECK-BE-NEXT: xscvuxddp f0, v2
+; CHECK-BE-NEXT: stfd f0, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %vecext = extractelement <8 x i16> %a, i32 0
+ %conv = uitofp i16 %vecext to double
+ store double %conv, double* %ptr, align 8
+ ret void
+}
+
+define dso_local void @teststof(<8 x i16> %a, float* nocapture %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: teststof:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vextractuh v2, v2, 14
+; CHECK-NEXT: vextsh2d v2, v2
+; CHECK-NEXT: xscvsxdsp f0, v2
+; CHECK-NEXT: stfs f0, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: teststof:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vextractuh v2, v2, 0
+; CHECK-BE-NEXT: vextsh2d v2, v2
+; CHECK-BE-NEXT: xscvsxdsp f0, v2
+; CHECK-BE-NEXT: stfs f0, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %vecext = extractelement <8 x i16> %a, i32 0
+ %conv = sitofp i16 %vecext to float
+ store float %conv, float* %ptr, align 4
+ ret void
+}
+
+define dso_local void @teststod(<8 x i16> %a, double* nocapture %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: teststod:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vextractuh v2, v2, 14
+; CHECK-NEXT: vextsh2d v2, v2
+; CHECK-NEXT: xscvsxddp f0, v2
+; CHECK-NEXT: stfd f0, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: teststod:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vextractuh v2, v2, 0
+; CHECK-BE-NEXT: vextsh2d v2, v2
+; CHECK-BE-NEXT: xscvsxddp f0, v2
+; CHECK-BE-NEXT: stfd f0, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %vecext = extractelement <8 x i16> %a, i32 0
+ %conv = sitofp i16 %vecext to double
+ store double %conv, double* %ptr, align 8
+ ret void
+}
+
+define dso_local void @testsubtod(<16 x i8> %a, double* nocapture %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: testsubtod:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vextractub v2, v2, 15
+; CHECK-NEXT: xscvuxddp f0, v2
+; CHECK-NEXT: stfd f0, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testsubtod:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vextractub v2, v2, 0
+; CHECK-BE-NEXT: xscvuxddp f0, v2
+; CHECK-BE-NEXT: stfd f0, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %vecext = extractelement <16 x i8> %a, i32 0
+ %conv = uitofp i8 %vecext to double
+ store double %conv, double* %ptr, align 8
+ ret void
+}
+
+define dso_local void @testsbtod(<16 x i8> %a, double* nocapture %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: testsbtod:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vextractuh v2, v2, 15
+; CHECK-NEXT: vextsh2d v2, v2
+; CHECK-NEXT: xscvsxddp f0, v2
+; CHECK-NEXT: stfd f0, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testsbtod:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vextractub v2, v2, 0
+; CHECK-BE-NEXT: vextsh2d v2, v2
+; CHECK-BE-NEXT: xscvsxddp f0, v2
+; CHECK-BE-NEXT: stfd f0, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %vecext = extractelement <16 x i8> %a, i32 0
+ %conv = sitofp i8 %vecext to double
+ store double %conv, double* %ptr, align 8
+ ret void
+}
+
+define dso_local void @testsubtof(<16 x i8> %a, float* nocapture %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: testsubtof:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vextractub v2, v2, 15
+; CHECK-NEXT: xscvuxdsp f0, v2
+; CHECK-NEXT: stfs f0, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testsubtof:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vextractub v2, v2, 0
+; CHECK-BE-NEXT: xscvuxdsp f0, v2
+; CHECK-BE-NEXT: stfs f0, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %vecext = extractelement <16 x i8> %a, i32 0
+ %conv = uitofp i8 %vecext to float
+ store float %conv, float* %ptr, align 8
+ ret void
+}
+
+define dso_local void @testsbtof(<16 x i8> %a, float* nocapture %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: testsbtof:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vextractub v2, v2, 15
+; CHECK-NEXT: vextsh2d v2, v2
+; CHECK-NEXT: xscvsxdsp f0, v2
+; CHECK-NEXT: stfs f0, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testsbtof:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vextractub v2, v2, 0
+; CHECK-BE-NEXT: vextsh2d v2, v2
+; CHECK-BE-NEXT: xscvsxdsp f0, v2
+; CHECK-BE-NEXT: stfs f0, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %vecext = extractelement <16 x i8> %a, i32 0
+ %conv = sitofp i8 %vecext to float
+ store float %conv, float* %ptr, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
index e73fa39dea34..8f68b94076a6 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
@@ -32,17 +32,11 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrws v2, r3
-; CHECK-P9-NEXT: li r3, 0
-; CHECK-P9-NEXT: vextuhrx r3, r3, v2
-; CHECK-P9-NEXT: clrlwi r3, r3, 16
-; CHECK-P9-NEXT: mtfprwz f0, r3
-; CHECK-P9-NEXT: li r3, 2
-; CHECK-P9-NEXT: xscvuxdsp f0, f0
-; CHECK-P9-NEXT: vextuhrx r3, r3, v2
-; CHECK-P9-NEXT: clrlwi r3, r3, 16
+; CHECK-P9-NEXT: vextractuh v3, v2, 14
+; CHECK-P9-NEXT: vextractuh v2, v2, 12
+; CHECK-P9-NEXT: xscvuxdsp f0, v3
; CHECK-P9-NEXT: xscvdpspn v3, f0
-; CHECK-P9-NEXT: mtfprwz f0, r3
-; CHECK-P9-NEXT: xscvuxdsp f0, f0
+; CHECK-P9-NEXT: xscvuxdsp f0, v2
; CHECK-P9-NEXT: xscvdpspn v2, f0
; CHECK-P9-NEXT: vmrghw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
@@ -51,17 +45,11 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrws v2, r3
-; CHECK-BE-NEXT: li r3, 2
-; CHECK-BE-NEXT: vextuhlx r3, r3, v2
-; CHECK-BE-NEXT: clrlwi r3, r3, 16
-; CHECK-BE-NEXT: mtfprwz f0, r3
-; CHECK-BE-NEXT: li r3, 0
-; CHECK-BE-NEXT: xscvuxdsp f0, f0
-; CHECK-BE-NEXT: vextuhlx r3, r3, v2
-; CHECK-BE-NEXT: clrlwi r3, r3, 16
+; CHECK-BE-NEXT: vextractuh v3, v2, 2
+; CHECK-BE-NEXT: vextractuh v2, v2, 0
+; CHECK-BE-NEXT: xscvuxdsp f0, v3
; CHECK-BE-NEXT: xscvdpspn v3, f0
-; CHECK-BE-NEXT: mtfprwz f0, r3
-; CHECK-BE-NEXT: xscvuxdsp f0, f0
+; CHECK-BE-NEXT: xscvuxdsp f0, v2
; CHECK-BE-NEXT: xscvdpspn v2, f0
; CHECK-BE-NEXT: vmrgow v2, v2, v3
; CHECK-BE-NEXT: mfvsrd r3, v2
@@ -251,17 +239,13 @@ define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrws v2, r3
-; CHECK-P9-NEXT: li r3, 0
-; CHECK-P9-NEXT: vextuhrx r3, r3, v2
-; CHECK-P9-NEXT: extsh r3, r3
-; CHECK-P9-NEXT: mtfprwa f0, r3
-; CHECK-P9-NEXT: li r3, 2
-; CHECK-P9-NEXT: xscvsxdsp f0, f0
-; CHECK-P9-NEXT: vextuhrx r3, r3, v2
-; CHECK-P9-NEXT: extsh r3, r3
+; CHECK-P9-NEXT: vextractuh v3, v2, 14
+; CHECK-P9-NEXT: vextractuh v2, v2, 12
+; CHECK-P9-NEXT: vextsh2d v3, v3
+; CHECK-P9-NEXT: vextsh2d v2, v2
+; CHECK-P9-NEXT: xscvsxdsp f0, v3
; CHECK-P9-NEXT: xscvdpspn v3, f0
-; CHECK-P9-NEXT: mtfprwa f0, r3
-; CHECK-P9-NEXT: xscvsxdsp f0, f0
+; CHECK-P9-NEXT: xscvsxdsp f0, v2
; CHECK-P9-NEXT: xscvdpspn v2, f0
; CHECK-P9-NEXT: vmrghw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
@@ -270,17 +254,13 @@ define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrws v2, r3
-; CHECK-BE-NEXT: li r3, 2
-; CHECK-BE-NEXT: vextuhlx r3, r3, v2
-; CHECK-BE-NEXT: extsh r3, r3
-; CHECK-BE-NEXT: mtfprwa f0, r3
-; CHECK-BE-NEXT: li r3, 0
-; CHECK-BE-NEXT: xscvsxdsp f0, f0
-; CHECK-BE-NEXT: vextuhlx r3, r3, v2
-; CHECK-BE-NEXT: extsh r3, r3
+; CHECK-BE-NEXT: vextractuh v3, v2, 2
+; CHECK-BE-NEXT: vextractuh v2, v2, 0
+; CHECK-BE-NEXT: vextsh2d v3, v3
+; CHECK-BE-NEXT: vextsh2d v2, v2
+; CHECK-BE-NEXT: xscvsxdsp f0, v3
; CHECK-BE-NEXT: xscvdpspn v3, f0
-; CHECK-BE-NEXT: mtfprwa f0, r3
-; CHECK-BE-NEXT: xscvsxdsp f0, f0
+; CHECK-BE-NEXT: xscvsxdsp f0, v2
; CHECK-BE-NEXT: xscvdpspn v2, f0
; CHECK-BE-NEXT: vmrgow v2, v2, v3
; CHECK-BE-NEXT: mfvsrd r3, v2
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
index f9ab5e1f60bf..792565007b57 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
@@ -32,17 +32,11 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrws v2, r3
-; CHECK-P9-NEXT: li r3, 0
-; CHECK-P9-NEXT: vextubrx r3, r3, v2
-; CHECK-P9-NEXT: clrlwi r3, r3, 24
-; CHECK-P9-NEXT: mtfprwz f0, r3
-; CHECK-P9-NEXT: li r3, 1
-; CHECK-P9-NEXT: xscvuxdsp f0, f0
-; CHECK-P9-NEXT: vextubrx r3, r3, v2
-; CHECK-P9-NEXT: clrlwi r3, r3, 24
+; CHECK-P9-NEXT: vextractub v3, v2, 15
+; CHECK-P9-NEXT: vextractub v2, v2, 14
+; CHECK-P9-NEXT: xscvuxdsp f0, v3
; CHECK-P9-NEXT: xscvdpspn v3, f0
-; CHECK-P9-NEXT: mtfprwz f0, r3
-; CHECK-P9-NEXT: xscvuxdsp f0, f0
+; CHECK-P9-NEXT: xscvuxdsp f0, v2
; CHECK-P9-NEXT: xscvdpspn v2, f0
; CHECK-P9-NEXT: vmrghw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
@@ -51,17 +45,11 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrws v2, r3
-; CHECK-BE-NEXT: li r3, 1
-; CHECK-BE-NEXT: vextublx r3, r3, v2
-; CHECK-BE-NEXT: clrlwi r3, r3, 24
-; CHECK-BE-NEXT: mtfprwz f0, r3
-; CHECK-BE-NEXT: li r3, 0
-; CHECK-BE-NEXT: xscvuxdsp f0, f0
-; CHECK-BE-NEXT: vextublx r3, r3, v2
-; CHECK-BE-NEXT: clrlwi r3, r3, 24
+; CHECK-BE-NEXT: vextractub v3, v2, 2
+; CHECK-BE-NEXT: vextractub v2, v2, 0
+; CHECK-BE-NEXT: xscvuxdsp f0, v3
; CHECK-BE-NEXT: xscvdpspn v3, f0
-; CHECK-BE-NEXT: mtfprwz f0, r3
-; CHECK-BE-NEXT: xscvuxdsp f0, f0
+; CHECK-BE-NEXT: xscvuxdsp f0, v2
; CHECK-BE-NEXT: xscvdpspn v2, f0
; CHECK-BE-NEXT: vmrgow v2, v2, v3
; CHECK-BE-NEXT: mfvsrd r3, v2
@@ -293,17 +281,13 @@ define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrws v2, r3
-; CHECK-P9-NEXT: li r3, 0
-; CHECK-P9-NEXT: vextubrx r3, r3, v2
-; CHECK-P9-NEXT: extsb r3, r3
-; CHECK-P9-NEXT: mtfprwa f0, r3
-; CHECK-P9-NEXT: li r3, 1
-; CHECK-P9-NEXT: xscvsxdsp f0, f0
-; CHECK-P9-NEXT: vextubrx r3, r3, v2
-; CHECK-P9-NEXT: extsb r3, r3
+; CHECK-P9-NEXT: vextractub v3, v2, 15
+; CHECK-P9-NEXT: vextractub v2, v2, 14
+; CHECK-P9-NEXT: vextsh2d v3, v3
+; CHECK-P9-NEXT: vextsh2d v2, v2
+; CHECK-P9-NEXT: xscvsxdsp f0, v3
; CHECK-P9-NEXT: xscvdpspn v3, f0
-; CHECK-P9-NEXT: mtfprwa f0, r3
-; CHECK-P9-NEXT: xscvsxdsp f0, f0
+; CHECK-P9-NEXT: xscvsxdsp f0, v2
; CHECK-P9-NEXT: xscvdpspn v2, f0
; CHECK-P9-NEXT: vmrghw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
@@ -312,17 +296,13 @@ define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrws v2, r3
-; CHECK-BE-NEXT: li r3, 1
-; CHECK-BE-NEXT: vextublx r3, r3, v2
-; CHECK-BE-NEXT: extsb r3, r3
-; CHECK-BE-NEXT: mtfprwa f0, r3
-; CHECK-BE-NEXT: li r3, 0
-; CHECK-BE-NEXT: xscvsxdsp f0, f0
-; CHECK-BE-NEXT: vextublx r3, r3, v2
-; CHECK-BE-NEXT: extsb r3, r3
+; CHECK-BE-NEXT: vextractub v3, v2, 2
+; CHECK-BE-NEXT: vextractub v2, v2, 0
+; CHECK-BE-NEXT: vextsh2d v3, v3
+; CHECK-BE-NEXT: vextsh2d v2, v2
+; CHECK-BE-NEXT: xscvsxdsp f0, v3
; CHECK-BE-NEXT: xscvdpspn v3, f0
-; CHECK-BE-NEXT: mtfprwa f0, r3
-; CHECK-BE-NEXT: xscvsxdsp f0, f0
+; CHECK-BE-NEXT: xscvsxdsp f0, v2
; CHECK-BE-NEXT: xscvdpspn v2, f0
; CHECK-BE-NEXT: vmrgow v2, v2, v3
; CHECK-BE-NEXT: mfvsrd r3, v2
More information about the llvm-commits
mailing list