[llvm] 092619c - [PowerPC] Improve codegen for vector fp to int widening conversions
Nemanja Ivanovic via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 22 03:04:13 PDT 2021
Author: Nemanja Ivanovic
Date: 2021-04-22T05:04:06-05:00
New Revision: 092619cf6b8d33e8830221925c1174f5d373f1d2
URL: https://github.com/llvm/llvm-project/commit/092619cf6b8d33e8830221925c1174f5d373f1d2
DIFF: https://github.com/llvm/llvm-project/commit/092619cf6b8d33e8830221925c1174f5d373f1d2.diff
LOG: [PowerPC] Improve codegen for vector fp to int widening conversions
We currently do not utilize instructions that convert single
precision vectors to doubleword integer vectors. These conversions
come up in code occasionally and this improvement allows us to
open code some functions that need to be added to altivec.h.
Added:
Modified:
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/test/CodeGen/PowerPC/build-vector-tests.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index e57f299dd895..0347a191856d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2899,6 +2899,22 @@ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
(f64 (fpextend (extractelt v4f32:$B, 3))))),
(v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $A, $B, 3),
(XXPERMDI $A, $B, 3), 1)))>;
+def : Pat<(v2i64 (fp_to_sint
+ (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 2)))))),
+ (v2i64 (XVCVSPSXDS $A))>;
+def : Pat<(v2i64 (fp_to_uint
+ (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 2)))))),
+ (v2i64 (XVCVSPUXDS $A))>;
+def : Pat<(v2i64 (fp_to_sint
+ (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 3)))))),
+ (v2i64 (XVCVSPSXDS (XXSLDWI $A, $A, 1)))>;
+def : Pat<(v2i64 (fp_to_uint
+ (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 3)))))),
+ (v2i64 (XVCVSPUXDS (XXSLDWI $A, $A, 1)))>;
def : Pat<WToDPExtractConv.BV02S,
(v2f64 (XVCVSXWDP $A))>;
def : Pat<WToDPExtractConv.BV13S,
@@ -3008,6 +3024,22 @@ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
(f64 (fpextend (extractelt v4f32:$B, 3))))),
(v2f64 (XVCVSPDP (XXPERMDI $B, $A, 0)))>;
+def : Pat<(v2i64 (fp_to_sint
+ (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 3)))))),
+ (v2i64 (XVCVSPSXDS $A))>;
+def : Pat<(v2i64 (fp_to_uint
+ (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 3)))))),
+ (v2i64 (XVCVSPUXDS $A))>;
+def : Pat<(v2i64 (fp_to_sint
+ (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 2)))))),
+ (v2i64 (XVCVSPSXDS (XXSLDWI $A, $A, 1)))>;
+def : Pat<(v2i64 (fp_to_uint
+ (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 2)))))),
+ (v2i64 (XVCVSPUXDS (XXSLDWI $A, $A, 1)))>;
def : Pat<WToDPExtractConv.BV02S,
(v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 1)))>;
def : Pat<WToDPExtractConv.BV13S,
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index ecc75307a596..b1820494a039 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -6532,3 +6532,131 @@ entry:
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
ret <2 x double> %vecinit3
}
+
+define dso_local <2 x i64> @test_xvcvspsxds13(<4 x float> %a) local_unnamed_addr {
+; P9BE-LABEL: test_xvcvspsxds13:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: xxsldwi vs0, v2, v2, 1
+; P9BE-NEXT: xvcvspsxds v2, vs0
+; P9BE-NEXT: blr
+;
+; P9LE-LABEL: test_xvcvspsxds13:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: xvcvspsxds v2, v2
+; P9LE-NEXT: blr
+;
+; P8BE-LABEL: test_xvcvspsxds13:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: xxsldwi vs0, v2, v2, 1
+; P8BE-NEXT: xvcvspsxds v2, vs0
+; P8BE-NEXT: blr
+;
+; P8LE-LABEL: test_xvcvspsxds13:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: xvcvspsxds v2, v2
+; P8LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x float> %a, i32 1
+ %conv = fptosi float %vecext to i64
+ %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %vecext1 = extractelement <4 x float> %a, i32 3
+ %conv2 = fptosi float %vecext1 to i64
+ %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+ ret <2 x i64> %vecinit3
+}
+
+define dso_local <2 x i64> @test_xvcvspuxds13(<4 x float> %a) local_unnamed_addr {
+; P9BE-LABEL: test_xvcvspuxds13:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: xxsldwi vs0, v2, v2, 1
+; P9BE-NEXT: xvcvspuxds v2, vs0
+; P9BE-NEXT: blr
+;
+; P9LE-LABEL: test_xvcvspuxds13:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: xvcvspuxds v2, v2
+; P9LE-NEXT: blr
+;
+; P8BE-LABEL: test_xvcvspuxds13:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: xxsldwi vs0, v2, v2, 1
+; P8BE-NEXT: xvcvspuxds v2, vs0
+; P8BE-NEXT: blr
+;
+; P8LE-LABEL: test_xvcvspuxds13:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: xvcvspuxds v2, v2
+; P8LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x float> %a, i32 1
+ %conv = fptoui float %vecext to i64
+ %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %vecext1 = extractelement <4 x float> %a, i32 3
+ %conv2 = fptoui float %vecext1 to i64
+ %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+ ret <2 x i64> %vecinit3
+}
+
+define dso_local <2 x i64> @test_xvcvspsxds02(<4 x float> %a) local_unnamed_addr {
+; P9BE-LABEL: test_xvcvspsxds02:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: xvcvspsxds v2, v2
+; P9BE-NEXT: blr
+;
+; P9LE-LABEL: test_xvcvspsxds02:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: xxsldwi vs0, v2, v2, 1
+; P9LE-NEXT: xvcvspsxds v2, vs0
+; P9LE-NEXT: blr
+;
+; P8BE-LABEL: test_xvcvspsxds02:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: xvcvspsxds v2, v2
+; P8BE-NEXT: blr
+;
+; P8LE-LABEL: test_xvcvspsxds02:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: xxsldwi vs0, v2, v2, 1
+; P8LE-NEXT: xvcvspsxds v2, vs0
+; P8LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x float> %a, i32 0
+ %conv = fptosi float %vecext to i64
+ %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %vecext1 = extractelement <4 x float> %a, i32 2
+ %conv2 = fptosi float %vecext1 to i64
+ %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+ ret <2 x i64> %vecinit3
+}
+
+define dso_local <2 x i64> @test_xvcvspuxds02(<4 x float> %a) local_unnamed_addr {
+; P9BE-LABEL: test_xvcvspuxds02:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: xvcvspuxds v2, v2
+; P9BE-NEXT: blr
+;
+; P9LE-LABEL: test_xvcvspuxds02:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: xxsldwi vs0, v2, v2, 1
+; P9LE-NEXT: xvcvspuxds v2, vs0
+; P9LE-NEXT: blr
+;
+; P8BE-LABEL: test_xvcvspuxds02:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: xvcvspuxds v2, v2
+; P8BE-NEXT: blr
+;
+; P8LE-LABEL: test_xvcvspuxds02:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: xxsldwi vs0, v2, v2, 1
+; P8LE-NEXT: xvcvspuxds v2, vs0
+; P8LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x float> %a, i32 0
+ %conv = fptoui float %vecext to i64
+ %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %vecext1 = extractelement <4 x float> %a, i32 2
+ %conv2 = fptoui float %vecext1 to i64
+ %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+ ret <2 x i64> %vecinit3
+}
More information about the llvm-commits
mailing list