[llvm] ffbffaf - [PowerPC] Improve codegen for int-to-fp conversion of subword vector extract

Albion Fung via llvm-commits llvm-commits at lists.llvm.org
Tue May 11 13:00:25 PDT 2021


Author: Albion Fung
Date: 2021-05-11T15:00:11-05:00
New Revision: ffbffaf6b6b0fc06abb7b43ec8de8bc61d941bc7

URL: https://github.com/llvm/llvm-project/commit/ffbffaf6b6b0fc06abb7b43ec8de8bc61d941bc7
DIFF: https://github.com/llvm/llvm-project/commit/ffbffaf6b6b0fc06abb7b43ec8de8bc61d941bc7.diff

LOG: [PowerPC] Improve codegen for int-to-fp conversion of subword vector extract

When an integer is converted into floating point in subword vector extract,
it can be done in 2 instructions instead of the 3+ instructions it generates
right now. This patch removes the uncessary generation.

Differential: https://reviews.llvm.org/D100604

Added: 
    llvm/test/CodeGen/PowerPC/vec-extract-itofp.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 95cf5ba95b13..66a1bc460865 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -4214,7 +4214,7 @@ foreach Idx = 0-3 in {
             (f128 (XSCVUDQP (XXEXTRACTUW $src, !shl(Idx, 2))))>;
 }
 
-// (Un)Signed HWord vector extract -> QP
+// (Un)Signed HWord vector extract -> QP/DP/SP
 foreach Idx = 0-7 in {
   def : Pat<(f128 (sint_to_fp
                     (i32 (sext_inreg
@@ -4227,6 +4227,31 @@ foreach Idx = 0-7 in {
                     (and (i32 (vector_extract v8i16:$src, Idx)), 65535))),
             (f128 (XSCVUDQP (EXTRACT_SUBREG
                               (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>;
+  def : Pat<(f32 (PPCfcfidus
+                   (f64 (PPCmtvsrz (and (i32 (vector_extract v8i16:$src, Idx)),
+                                        65535))))),
+            (f32 (XSCVUXDSP (EXTRACT_SUBREG
+                              (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>;
+  def : Pat<(f32 (PPCfcfids
+                   (f64 (PPCmtvsra
+                          (i32 (sext_inreg (vector_extract v8i16:$src, Idx),
+                               i16)))))),
+          (f32 (XSCVSXDSP (EXTRACT_SUBREG
+                            (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)),
+                            sub_64)))>;
+  def : Pat<(f64 (PPCfcfidu
+                   (f64 (PPCmtvsrz
+                          (and (i32 (vector_extract v8i16:$src, Idx)),
+                               65535))))),
+            (f64 (XSCVUXDDP (EXTRACT_SUBREG
+                              (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>;
+  def : Pat<(f64 (PPCfcfid
+                   (f64 (PPCmtvsra
+                          (i32 (sext_inreg (vector_extract v8i16:$src, Idx),
+                               i16)))))),
+          (f64 (XSCVSXDDP (EXTRACT_SUBREG
+                            (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)),
+                            sub_64)))>;
 }
 
 // (Un)Signed Byte vector extract -> QP
@@ -4240,6 +4265,33 @@ foreach Idx = 0-15 in {
                     (and (i32 (vector_extract v16i8:$src, Idx)), 255))),
             (f128 (XSCVUDQP
                     (EXTRACT_SUBREG (VEXTRACTUB Idx, $src), sub_64)))>;
+
+  def : Pat<(f32 (PPCfcfidus
+                   (f64 (PPCmtvsrz
+                          (and (i32 (vector_extract v16i8:$src, Idx)),
+                               255))))),
+            (f32 (XSCVUXDSP (EXTRACT_SUBREG
+                              (VEXTRACTUB !add(Idx, Idx), $src), sub_64)))>;
+  def : Pat<(f32 (PPCfcfids
+                   (f64 (PPCmtvsra
+                          (i32 (sext_inreg (vector_extract v16i8:$src, Idx),
+                               i8)))))),
+          (f32 (XSCVSXDSP (EXTRACT_SUBREG
+                            (VEXTSH2D (VEXTRACTUB !add(Idx, Idx), $src)),
+                            sub_64)))>;
+  def : Pat<(f64 (PPCfcfidu
+                   (f64 (PPCmtvsrz
+                          (and (i32 (vector_extract v16i8:$src, Idx)),
+                          255))))),
+            (f64 (XSCVUXDDP (EXTRACT_SUBREG
+                              (VEXTRACTUB !add(Idx, Idx), $src), sub_64)))>;
+  def : Pat<(f64 (PPCfcfid
+                   (f64 (PPCmtvsra
+                          (i32 (sext_inreg (vector_extract v16i8:$src, Idx),
+                               i8)))))),
+          (f64 (XSCVSXDDP (EXTRACT_SUBREG
+                            (VEXTSH2D (VEXTRACTUB !add(Idx, Idx), $src)),
+                            sub_64)))>;
 }
 
 // Unsiged int in vsx register -> QP
@@ -4410,7 +4462,7 @@ foreach Idx = [[0,12],[1,8],[2,4],[3,0]] in {
             (f128 (XSCVUDQP (XXEXTRACTUW $src, !head(!tail(Idx)))))>;
 }
 
-// (Un)Signed HWord vector extract -> QP
+// (Un)Signed HWord vector extract -> QP/DP/SP
 // The Nested foreach lists identifies the vector element and corresponding
 // register byte location.
 foreach Idx = [[0,14],[1,12],[2,10],[3,8],[4,6],[5,4],[6,2],[7,0]] in {
@@ -4426,9 +4478,37 @@ foreach Idx = [[0,14],[1,12],[2,10],[3,8],[4,6],[5,4],[6,2],[7,0]] in {
                          65535))),
             (f128 (XSCVUDQP (EXTRACT_SUBREG
                               (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>;
+  def : Pat<(f32 (PPCfcfidus
+                   (f64 (PPCmtvsrz
+                          (and (i32 (vector_extract v8i16:$src, !head(Idx))),
+                          65535))))),
+            (f32 (XSCVUXDSP (EXTRACT_SUBREG
+                              (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>;
+  def : Pat<(f32 (PPCfcfids
+                   (f64 (PPCmtvsra
+                          (i32 (sext_inreg (vector_extract v8i16:$src,
+                                           !head(Idx)), i16)))))),
+            (f32 (XSCVSXDSP
+                    (EXTRACT_SUBREG
+                     (VEXTSH2D (VEXTRACTUH !head(!tail(Idx)), $src)),
+                     sub_64)))>;
+  def : Pat<(f64 (PPCfcfidu
+                   (f64 (PPCmtvsrz
+                          (and (i32 (vector_extract v8i16:$src, !head(Idx))),
+                          65535))))),
+            (f64 (XSCVUXDDP (EXTRACT_SUBREG
+                              (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>;
+  def : Pat<(f64 (PPCfcfid
+                   (f64 (PPCmtvsra
+                        (i32 (sext_inreg
+                            (vector_extract v8i16:$src, !head(Idx)), i16)))))),
+            (f64 (XSCVSXDDP
+                    (EXTRACT_SUBREG (VEXTSH2D
+                                      (VEXTRACTUH !head(!tail(Idx)), $src)),
+                                    sub_64)))>;
 }
 
-// (Un)Signed Byte vector extract -> QP
+// (Un)Signed Byte vector extract -> QP/DP/SP
 foreach Idx = [[0,15],[1,14],[2,13],[3,12],[4,11],[5,10],[6,9],[7,8],[8,7],
                [9,6],[10,5],[11,4],[12,3],[13,2],[14,1],[15,0]] in {
   def : Pat<(f128 (sint_to_fp
@@ -4444,6 +4524,44 @@ foreach Idx = [[0,15],[1,14],[2,13],[3,12],[4,11],[5,10],[6,9],[7,8],[8,7],
             (f128 (XSCVUDQP
                     (EXTRACT_SUBREG
                       (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>;
+
+  def : Pat<(f32 (PPCfcfidus
+                   (f64 (PPCmtvsrz
+                          (and (i32 (vector_extract v16i8:$src, !head(Idx))),
+                          255))))),
+            (f32 (XSCVUXDSP (EXTRACT_SUBREG
+                              (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>;
+  def : Pat<(f32 (PPCfcfids
+                   (f64 (PPCmtvsra
+                          (i32 (sext_inreg
+                            (vector_extract v16i8:$src, !head(Idx)), i8)))))),
+            (f32 (XSCVSXDSP
+                    (EXTRACT_SUBREG (VEXTSH2D
+                                      (VEXTRACTUB !head(!tail(Idx)), $src)),
+                                    sub_64)))>;
+  def : Pat<(f64 (PPCfcfidu
+                   (f64 (PPCmtvsrz
+                          (and (i32
+                            (vector_extract v16i8:$src, !head(Idx))), 255))))),
+            (f64 (XSCVUXDDP (EXTRACT_SUBREG
+                              (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>;
+  def : Pat<(f64 (PPCfcfidu
+                   (f64 (PPCmtvsra
+                        (i32 (sext_inreg
+                            (vector_extract v16i8:$src, !head(Idx)), i8)))))),
+            (f64 (XSCVSXDDP
+                    (EXTRACT_SUBREG (VEXTSH2D
+                                      (VEXTRACTUB !head(!tail(Idx)), $src)),
+                                    sub_64)))>;
+
+  def : Pat<(f64 (PPCfcfid
+                   (f64 (PPCmtvsra
+                        (i32 (sext_inreg
+                          (vector_extract v16i8:$src, !head(Idx)), i8)))))),
+            (f64 (XSCVSXDDP
+                    (EXTRACT_SUBREG (VEXTSH2D
+                                      (VEXTRACTUH !head(!tail(Idx)), $src)),
+                                    sub_64)))>;
 }
 
 // Unsiged int in vsx register -> QP

diff  --git a/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll b/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
index c04f0ff35f70..cc218d15987f 100644
--- a/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
+++ b/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
@@ -14,31 +14,19 @@
 define dso_local <2 x double> @test1(<8 x i16> %a) {
 ; P9BE-LABEL: test1:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    mtfprwz f0, r3
-; P9BE-NEXT:    li r3, 2
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    xscvuxddp f0, f0
-; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    xscvuxddp f1, f1
+; P9BE-NEXT:    vextractuh v3, v2, 0
+; P9BE-NEXT:    vextractuh v2, v2, 2
+; P9BE-NEXT:    xscvuxddp f0, v3
+; P9BE-NEXT:    xscvuxddp f1, v2
 ; P9BE-NEXT:    xxmrghd v2, vs0, vs1
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: test1:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    li r3, 0
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    clrlwi r3, r3, 16
-; P9LE-NEXT:    mtfprwz f0, r3
-; P9LE-NEXT:    li r3, 2
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    xscvuxddp f0, f0
-; P9LE-NEXT:    clrlwi r3, r3, 16
-; P9LE-NEXT:    mtfprwz f1, r3
-; P9LE-NEXT:    xscvuxddp f1, f1
+; P9LE-NEXT:    vextractuh v3, v2, 14
+; P9LE-NEXT:    vextractuh v2, v2, 12
+; P9LE-NEXT:    xscvuxddp f0, v3
+; P9LE-NEXT:    xscvuxddp f1, v2
 ; P9LE-NEXT:    xxmrghd v2, vs1, vs0
 ; P9LE-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vec-extract-itofp.ll b/llvm/test/CodeGen/PowerPC/vec-extract-itofp.ll
new file mode 100644
index 000000000000..29af49815dc7
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vec-extract-itofp.ll
@@ -0,0 +1,183 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -ppc-vsr-nums-as-vr \
+; RUN:   -relocation-model=pic -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN:   < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-unknown -ppc-vsr-nums-as-vr \
+; RUN:   -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN:   < %s | FileCheck %s -check-prefix=CHECK-BE
+
+define dso_local void @testutof(<8 x i16> %a, float* nocapture %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: testutof:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vextractuh v2, v2, 14
+; CHECK-NEXT:    xscvuxdsp f0, v2
+; CHECK-NEXT:    stfs f0, 0(r5)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: testutof:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vextractuh v2, v2, 0
+; CHECK-BE-NEXT:    xscvuxdsp f0, v2
+; CHECK-BE-NEXT:    stfs f0, 0(r5)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <8 x i16> %a, i32 0
+  %conv = uitofp i16 %vecext to float
+  store float %conv, float* %ptr, align 4
+  ret void
+}
+
+define dso_local void @testutod(<8 x i16> %a, double* nocapture %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: testutod:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vextractuh v2, v2, 14
+; CHECK-NEXT:    xscvuxddp f0, v2
+; CHECK-NEXT:    stfd f0, 0(r5)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: testutod:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vextractuh v2, v2, 0
+; CHECK-BE-NEXT:    xscvuxddp f0, v2
+; CHECK-BE-NEXT:    stfd f0, 0(r5)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <8 x i16> %a, i32 0
+  %conv = uitofp i16 %vecext to double
+  store double %conv, double* %ptr, align 8
+  ret void
+}
+
+define dso_local void @teststof(<8 x i16> %a, float* nocapture %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: teststof:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vextractuh v2, v2, 14
+; CHECK-NEXT:    vextsh2d v2, v2
+; CHECK-NEXT:    xscvsxdsp f0, v2
+; CHECK-NEXT:    stfs f0, 0(r5)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: teststof:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vextractuh v2, v2, 0
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    xscvsxdsp f0, v2
+; CHECK-BE-NEXT:    stfs f0, 0(r5)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <8 x i16> %a, i32 0
+  %conv = sitofp i16 %vecext to float
+  store float %conv, float* %ptr, align 4
+  ret void
+}
+
+define dso_local void @teststod(<8 x i16> %a, double* nocapture %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: teststod:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vextractuh v2, v2, 14
+; CHECK-NEXT:    vextsh2d v2, v2
+; CHECK-NEXT:    xscvsxddp f0, v2
+; CHECK-NEXT:    stfd f0, 0(r5)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: teststod:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vextractuh v2, v2, 0
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    xscvsxddp f0, v2
+; CHECK-BE-NEXT:    stfd f0, 0(r5)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <8 x i16> %a, i32 0
+  %conv = sitofp i16 %vecext to double
+  store double %conv, double* %ptr, align 8
+  ret void
+}
+
+define dso_local void @testsubtod(<16 x i8> %a, double* nocapture %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: testsubtod:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vextractub v2, v2, 15
+; CHECK-NEXT:    xscvuxddp f0, v2
+; CHECK-NEXT:    stfd f0, 0(r5)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: testsubtod:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vextractub v2, v2, 0
+; CHECK-BE-NEXT:    xscvuxddp f0, v2
+; CHECK-BE-NEXT:    stfd f0, 0(r5)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <16 x i8> %a, i32 0
+  %conv = uitofp i8 %vecext to double
+  store double %conv, double* %ptr, align 8
+  ret void
+}
+
+define dso_local void @testsbtod(<16 x i8> %a, double* nocapture %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: testsbtod:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vextractuh v2, v2, 15
+; CHECK-NEXT:    vextsh2d v2, v2
+; CHECK-NEXT:    xscvsxddp f0, v2
+; CHECK-NEXT:    stfd f0, 0(r5)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: testsbtod:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vextractub v2, v2, 0
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    xscvsxddp f0, v2
+; CHECK-BE-NEXT:    stfd f0, 0(r5)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <16 x i8> %a, i32 0
+  %conv = sitofp i8 %vecext to double
+  store double %conv, double* %ptr, align 8
+  ret void
+}
+
+define dso_local void @testsubtof(<16 x i8> %a, float* nocapture %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: testsubtof:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vextractub v2, v2, 15
+; CHECK-NEXT:    xscvuxdsp f0, v2
+; CHECK-NEXT:    stfs f0, 0(r5)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: testsubtof:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vextractub v2, v2, 0
+; CHECK-BE-NEXT:    xscvuxdsp f0, v2
+; CHECK-BE-NEXT:    stfs f0, 0(r5)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <16 x i8> %a, i32 0
+  %conv = uitofp i8 %vecext to float
+  store float %conv, float* %ptr, align 8
+  ret void
+}
+
+define dso_local void @testsbtof(<16 x i8> %a, float* nocapture %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: testsbtof:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vextractub v2, v2, 15
+; CHECK-NEXT:    vextsh2d v2, v2
+; CHECK-NEXT:    xscvsxdsp f0, v2
+; CHECK-NEXT:    stfs f0, 0(r5)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: testsbtof:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vextractub v2, v2, 0
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    xscvsxdsp f0, v2
+; CHECK-BE-NEXT:    stfs f0, 0(r5)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <16 x i8> %a, i32 0
+  %conv = sitofp i8 %vecext to float
+  store float %conv, float* %ptr, align 8
+  ret void
+}

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
index e73fa39dea34..8f68b94076a6 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
@@ -32,17 +32,11 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mtvsrws v2, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
-; CHECK-P9-NEXT:    clrlwi r3, r3, 16
-; CHECK-P9-NEXT:    mtfprwz f0, r3
-; CHECK-P9-NEXT:    li r3, 2
-; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
-; CHECK-P9-NEXT:    clrlwi r3, r3, 16
+; CHECK-P9-NEXT:    vextractuh v3, v2, 14
+; CHECK-P9-NEXT:    vextractuh v2, v2, 12
+; CHECK-P9-NEXT:    xscvuxdsp f0, v3
 ; CHECK-P9-NEXT:    xscvdpspn v3, f0
-; CHECK-P9-NEXT:    mtfprwz f0, r3
-; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvuxdsp f0, v2
 ; CHECK-P9-NEXT:    xscvdpspn v2, f0
 ; CHECK-P9-NEXT:    vmrghw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
@@ -51,17 +45,11 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrws v2, r3
-; CHECK-BE-NEXT:    li r3, 2
-; CHECK-BE-NEXT:    vextuhlx r3, r3, v2
-; CHECK-BE-NEXT:    clrlwi r3, r3, 16
-; CHECK-BE-NEXT:    mtfprwz f0, r3
-; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    xscvuxdsp f0, f0
-; CHECK-BE-NEXT:    vextuhlx r3, r3, v2
-; CHECK-BE-NEXT:    clrlwi r3, r3, 16
+; CHECK-BE-NEXT:    vextractuh v3, v2, 2
+; CHECK-BE-NEXT:    vextractuh v2, v2, 0
+; CHECK-BE-NEXT:    xscvuxdsp f0, v3
 ; CHECK-BE-NEXT:    xscvdpspn v3, f0
-; CHECK-BE-NEXT:    mtfprwz f0, r3
-; CHECK-BE-NEXT:    xscvuxdsp f0, f0
+; CHECK-BE-NEXT:    xscvuxdsp f0, v2
 ; CHECK-BE-NEXT:    xscvdpspn v2, f0
 ; CHECK-BE-NEXT:    vmrgow v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
@@ -251,17 +239,13 @@ define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mtvsrws v2, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
-; CHECK-P9-NEXT:    extsh r3, r3
-; CHECK-P9-NEXT:    mtfprwa f0, r3
-; CHECK-P9-NEXT:    li r3, 2
-; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
-; CHECK-P9-NEXT:    extsh r3, r3
+; CHECK-P9-NEXT:    vextractuh v3, v2, 14
+; CHECK-P9-NEXT:    vextractuh v2, v2, 12
+; CHECK-P9-NEXT:    vextsh2d v3, v3
+; CHECK-P9-NEXT:    vextsh2d v2, v2
+; CHECK-P9-NEXT:    xscvsxdsp f0, v3
 ; CHECK-P9-NEXT:    xscvdpspn v3, f0
-; CHECK-P9-NEXT:    mtfprwa f0, r3
-; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvsxdsp f0, v2
 ; CHECK-P9-NEXT:    xscvdpspn v2, f0
 ; CHECK-P9-NEXT:    vmrghw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
@@ -270,17 +254,13 @@ define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrws v2, r3
-; CHECK-BE-NEXT:    li r3, 2
-; CHECK-BE-NEXT:    vextuhlx r3, r3, v2
-; CHECK-BE-NEXT:    extsh r3, r3
-; CHECK-BE-NEXT:    mtfprwa f0, r3
-; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    xscvsxdsp f0, f0
-; CHECK-BE-NEXT:    vextuhlx r3, r3, v2
-; CHECK-BE-NEXT:    extsh r3, r3
+; CHECK-BE-NEXT:    vextractuh v3, v2, 2
+; CHECK-BE-NEXT:    vextractuh v2, v2, 0
+; CHECK-BE-NEXT:    vextsh2d v3, v3
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    xscvsxdsp f0, v3
 ; CHECK-BE-NEXT:    xscvdpspn v3, f0
-; CHECK-BE-NEXT:    mtfprwa f0, r3
-; CHECK-BE-NEXT:    xscvsxdsp f0, f0
+; CHECK-BE-NEXT:    xscvsxdsp f0, v2
 ; CHECK-BE-NEXT:    xscvdpspn v2, f0
 ; CHECK-BE-NEXT:    vmrgow v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
index f9ab5e1f60bf..792565007b57 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
@@ -32,17 +32,11 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mtvsrws v2, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    vextubrx r3, r3, v2
-; CHECK-P9-NEXT:    clrlwi r3, r3, 24
-; CHECK-P9-NEXT:    mtfprwz f0, r3
-; CHECK-P9-NEXT:    li r3, 1
-; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    vextubrx r3, r3, v2
-; CHECK-P9-NEXT:    clrlwi r3, r3, 24
+; CHECK-P9-NEXT:    vextractub v3, v2, 15
+; CHECK-P9-NEXT:    vextractub v2, v2, 14
+; CHECK-P9-NEXT:    xscvuxdsp f0, v3
 ; CHECK-P9-NEXT:    xscvdpspn v3, f0
-; CHECK-P9-NEXT:    mtfprwz f0, r3
-; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvuxdsp f0, v2
 ; CHECK-P9-NEXT:    xscvdpspn v2, f0
 ; CHECK-P9-NEXT:    vmrghw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
@@ -51,17 +45,11 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrws v2, r3
-; CHECK-BE-NEXT:    li r3, 1
-; CHECK-BE-NEXT:    vextublx r3, r3, v2
-; CHECK-BE-NEXT:    clrlwi r3, r3, 24
-; CHECK-BE-NEXT:    mtfprwz f0, r3
-; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    xscvuxdsp f0, f0
-; CHECK-BE-NEXT:    vextublx r3, r3, v2
-; CHECK-BE-NEXT:    clrlwi r3, r3, 24
+; CHECK-BE-NEXT:    vextractub v3, v2, 2
+; CHECK-BE-NEXT:    vextractub v2, v2, 0
+; CHECK-BE-NEXT:    xscvuxdsp f0, v3
 ; CHECK-BE-NEXT:    xscvdpspn v3, f0
-; CHECK-BE-NEXT:    mtfprwz f0, r3
-; CHECK-BE-NEXT:    xscvuxdsp f0, f0
+; CHECK-BE-NEXT:    xscvuxdsp f0, v2
 ; CHECK-BE-NEXT:    xscvdpspn v2, f0
 ; CHECK-BE-NEXT:    vmrgow v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
@@ -293,17 +281,13 @@ define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mtvsrws v2, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    vextubrx r3, r3, v2
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    mtfprwa f0, r3
-; CHECK-P9-NEXT:    li r3, 1
-; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    vextubrx r3, r3, v2
-; CHECK-P9-NEXT:    extsb r3, r3
+; CHECK-P9-NEXT:    vextractub v3, v2, 15
+; CHECK-P9-NEXT:    vextractub v2, v2, 14
+; CHECK-P9-NEXT:    vextsh2d v3, v3
+; CHECK-P9-NEXT:    vextsh2d v2, v2
+; CHECK-P9-NEXT:    xscvsxdsp f0, v3
 ; CHECK-P9-NEXT:    xscvdpspn v3, f0
-; CHECK-P9-NEXT:    mtfprwa f0, r3
-; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvsxdsp f0, v2
 ; CHECK-P9-NEXT:    xscvdpspn v2, f0
 ; CHECK-P9-NEXT:    vmrghw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
@@ -312,17 +296,13 @@ define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrws v2, r3
-; CHECK-BE-NEXT:    li r3, 1
-; CHECK-BE-NEXT:    vextublx r3, r3, v2
-; CHECK-BE-NEXT:    extsb r3, r3
-; CHECK-BE-NEXT:    mtfprwa f0, r3
-; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    xscvsxdsp f0, f0
-; CHECK-BE-NEXT:    vextublx r3, r3, v2
-; CHECK-BE-NEXT:    extsb r3, r3
+; CHECK-BE-NEXT:    vextractub v3, v2, 2
+; CHECK-BE-NEXT:    vextractub v2, v2, 0
+; CHECK-BE-NEXT:    vextsh2d v3, v3
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    xscvsxdsp f0, v3
 ; CHECK-BE-NEXT:    xscvdpspn v3, f0
-; CHECK-BE-NEXT:    mtfprwa f0, r3
-; CHECK-BE-NEXT:    xscvsxdsp f0, f0
+; CHECK-BE-NEXT:    xscvsxdsp f0, v2
 ; CHECK-BE-NEXT:    xscvdpspn v2, f0
 ; CHECK-BE-NEXT:    vmrgow v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2


        


More information about the llvm-commits mailing list