[llvm] 092619c - [PowerPC] Improve codegen for vector fp to int widening conversions

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 22 03:04:13 PDT 2021


Author: Nemanja Ivanovic
Date: 2021-04-22T05:04:06-05:00
New Revision: 092619cf6b8d33e8830221925c1174f5d373f1d2

URL: https://github.com/llvm/llvm-project/commit/092619cf6b8d33e8830221925c1174f5d373f1d2
DIFF: https://github.com/llvm/llvm-project/commit/092619cf6b8d33e8830221925c1174f5d373f1d2.diff

LOG: [PowerPC] Improve codegen for vector fp to int widening conversions

We currently do not utilize instructions that convert single
precision vectors to doubleword integer vectors. These conversions
come up in code occasionally and this improvement allows us to
open code some functions that need to be added to altivec.h.

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/test/CodeGen/PowerPC/build-vector-tests.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index e57f299dd895..0347a191856d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2899,6 +2899,22 @@ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
                                (f64 (fpextend (extractelt v4f32:$B, 3))))),
           (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $A, $B, 3),
                                     (XXPERMDI $A, $B, 3), 1)))>;
+def : Pat<(v2i64 (fp_to_sint
+                   (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+                                 (f64 (fpextend (extractelt v4f32:$A, 2)))))),
+          (v2i64 (XVCVSPSXDS $A))>;
+def : Pat<(v2i64 (fp_to_uint
+                   (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+                                 (f64 (fpextend (extractelt v4f32:$A, 2)))))),
+          (v2i64 (XVCVSPUXDS $A))>;
+def : Pat<(v2i64 (fp_to_sint
+                   (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+                                 (f64 (fpextend (extractelt v4f32:$A, 3)))))),
+          (v2i64 (XVCVSPSXDS (XXSLDWI $A, $A, 1)))>;
+def : Pat<(v2i64 (fp_to_uint
+                   (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+                                 (f64 (fpextend (extractelt v4f32:$A, 3)))))),
+          (v2i64 (XVCVSPUXDS (XXSLDWI $A, $A, 1)))>;
 def : Pat<WToDPExtractConv.BV02S,
           (v2f64 (XVCVSXWDP $A))>;
 def : Pat<WToDPExtractConv.BV13S,
@@ -3008,6 +3024,22 @@ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
                                (f64 (fpextend (extractelt v4f32:$B, 3))))),
           (v2f64 (XVCVSPDP (XXPERMDI $B, $A, 0)))>;
+def : Pat<(v2i64 (fp_to_sint
+                   (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+                                 (f64 (fpextend (extractelt v4f32:$A, 3)))))),
+          (v2i64 (XVCVSPSXDS $A))>;
+def : Pat<(v2i64 (fp_to_uint
+                   (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+                                 (f64 (fpextend (extractelt v4f32:$A, 3)))))),
+          (v2i64 (XVCVSPUXDS $A))>;
+def : Pat<(v2i64 (fp_to_sint
+                   (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+                                 (f64 (fpextend (extractelt v4f32:$A, 2)))))),
+          (v2i64 (XVCVSPSXDS (XXSLDWI $A, $A, 1)))>;
+def : Pat<(v2i64 (fp_to_uint
+                   (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+                                 (f64 (fpextend (extractelt v4f32:$A, 2)))))),
+          (v2i64 (XVCVSPUXDS (XXSLDWI $A, $A, 1)))>;
 def : Pat<WToDPExtractConv.BV02S,
           (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 1)))>;
 def : Pat<WToDPExtractConv.BV13S,

diff  --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index ecc75307a596..b1820494a039 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -6532,3 +6532,131 @@ entry:
   %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
   ret <2 x double> %vecinit3
 }
+
+define dso_local <2 x i64> @test_xvcvspsxds13(<4 x float> %a) local_unnamed_addr {
+; P9BE-LABEL: test_xvcvspsxds13:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xxsldwi vs0, v2, v2, 1
+; P9BE-NEXT:    xvcvspsxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: test_xvcvspsxds13:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xvcvspsxds v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: test_xvcvspsxds13:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xxsldwi vs0, v2, v2, 1
+; P8BE-NEXT:    xvcvspsxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: test_xvcvspsxds13:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xvcvspsxds v2, v2
+; P8LE-NEXT:    blr
+entry:
+  %vecext = extractelement <4 x float> %a, i32 1
+  %conv = fptosi float %vecext to i64
+  %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 3
+  %conv2 = fptosi float %vecext1 to i64
+  %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+  ret <2 x i64> %vecinit3
+}
+
+define dso_local <2 x i64> @test_xvcvspuxds13(<4 x float> %a) local_unnamed_addr {
+; P9BE-LABEL: test_xvcvspuxds13:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xxsldwi vs0, v2, v2, 1
+; P9BE-NEXT:    xvcvspuxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: test_xvcvspuxds13:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xvcvspuxds v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: test_xvcvspuxds13:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xxsldwi vs0, v2, v2, 1
+; P8BE-NEXT:    xvcvspuxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: test_xvcvspuxds13:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xvcvspuxds v2, v2
+; P8LE-NEXT:    blr
+entry:
+  %vecext = extractelement <4 x float> %a, i32 1
+  %conv = fptoui float %vecext to i64
+  %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 3
+  %conv2 = fptoui float %vecext1 to i64
+  %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+  ret <2 x i64> %vecinit3
+}
+
+define dso_local <2 x i64> @test_xvcvspsxds02(<4 x float> %a) local_unnamed_addr {
+; P9BE-LABEL: test_xvcvspsxds02:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xvcvspsxds v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: test_xvcvspsxds02:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xxsldwi vs0, v2, v2, 1
+; P9LE-NEXT:    xvcvspsxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: test_xvcvspsxds02:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xvcvspsxds v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: test_xvcvspsxds02:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xxsldwi vs0, v2, v2, 1
+; P8LE-NEXT:    xvcvspsxds v2, vs0
+; P8LE-NEXT:    blr
+entry:
+  %vecext = extractelement <4 x float> %a, i32 0
+  %conv = fptosi float %vecext to i64
+  %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 2
+  %conv2 = fptosi float %vecext1 to i64
+  %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+  ret <2 x i64> %vecinit3
+}
+
+define dso_local <2 x i64> @test_xvcvspuxds02(<4 x float> %a) local_unnamed_addr {
+; P9BE-LABEL: test_xvcvspuxds02:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xvcvspuxds v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: test_xvcvspuxds02:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xxsldwi vs0, v2, v2, 1
+; P9LE-NEXT:    xvcvspuxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: test_xvcvspuxds02:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xvcvspuxds v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: test_xvcvspuxds02:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xxsldwi vs0, v2, v2, 1
+; P8LE-NEXT:    xvcvspuxds v2, vs0
+; P8LE-NEXT:    blr
+entry:
+  %vecext = extractelement <4 x float> %a, i32 0
+  %conv = fptoui float %vecext to i64
+  %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 2
+  %conv2 = fptoui float %vecext1 to i64
+  %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+  ret <2 x i64> %vecinit3
+}


        


More information about the llvm-commits mailing list