[llvm] r275796 - [PowerPC] Remove redundant direct moves when extracting integers and converting to FP
Nemanja Ivanovic via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 18 08:30:00 PDT 2016
Author: nemanjai
Date: Mon Jul 18 10:30:00 2016
New Revision: 275796
URL: http://llvm.org/viewvc/llvm-project?rev=275796&view=rev
Log:
[PowerPC] Remove redundant direct moves when extracting integers and converting to FP
This patch corresponds to review:
https://reviews.llvm.org/D21354
We use direct moves for extracting integer elements from vectors. We also use
direct moves when converting integers to FP. When these operations are chained,
we get a direct move out of a VSR followed by a direct move back into a VSR.
These are redundant - all we need to do is line up the element and convert.
Added:
llvm/trunk/test/CodeGen/PowerPC/remove-redundant-moves.ll
Modified:
llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td?rev=275796&r1=275795&r2=275796&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td Mon Jul 18 10:30:00 2016
@@ -1029,6 +1029,28 @@ def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:
def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
(XVRSQRTEDP $A)>;
+let Predicates = [IsLittleEndian] in {
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+} // IsLittleEndian
+
+let Predicates = [IsBigEndian] in {
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+} // IsBigEndian
+
} // AddedComplexity
} // HasVSX
@@ -1235,6 +1257,27 @@ let AddedComplexity = 400 in { // Prefer
def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
"xscvspdpn $XT, $XB", IIC_VecFP, []>;
+ let Predicates = [IsLittleEndian] in {
+ def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+ def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+ def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+ def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+ }
+
+ let Predicates = [IsBigEndian] in {
+ def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
+ def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+ def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
+ def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+ }
} // AddedComplexity = 400
} // HasP8Vector
Added: llvm/trunk/test/CodeGen/PowerPC/remove-redundant-moves.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/remove-redundant-moves.ll?rev=275796&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/remove-redundant-moves.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/remove-redundant-moves.ll Mon Jul 18 10:30:00 2016
@@ -0,0 +1,107 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-BE
+define double @test1(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test1
+; CHECK: xxswapd [[SW:[0-9]+]], 34
+; CHECK: xscvsxddp 1, [[SW]]
+; CHECK-BE-LABEL: test1
+; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34
+; CHECK-BE: xscvsxddp 1, [[CP]]
+ %0 = extractelement <2 x i64> %a, i32 0
+ %1 = sitofp i64 %0 to double
+ ret double %1
+}
+
+define double @test2(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test2
+; CHECK: xxlor [[CP:[0-9]+]], 34, 34
+; CHECK: xscvsxddp 1, [[CP]]
+; CHECK-BE-LABEL: test2
+; CHECK-BE: xxswapd [[SW:[0-9]+]], 34
+; CHECK-BE: xscvsxddp 1, [[SW]]
+ %0 = extractelement <2 x i64> %a, i32 1
+ %1 = sitofp i64 %0 to double
+ ret double %1
+}
+
+define float @test1f(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test1f
+; CHECK: xxswapd [[SW:[0-9]+]], 34
+; CHECK: xscvsxdsp 1, [[SW]]
+; CHECK-BE-LABEL: test1f
+; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34
+; CHECK-BE: xscvsxdsp 1, [[CP]]
+ %0 = extractelement <2 x i64> %a, i32 0
+ %1 = sitofp i64 %0 to float
+ ret float %1
+}
+
+define float @test2f(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test2f
+; CHECK: xxlor [[CP:[0-9]+]], 34, 34
+; CHECK: xscvsxdsp 1, [[CP]]
+; CHECK-BE-LABEL: test2f
+; CHECK-BE: xxswapd [[SW:[0-9]+]], 34
+; CHECK-BE: xscvsxdsp 1, [[SW]]
+ %0 = extractelement <2 x i64> %a, i32 1
+ %1 = sitofp i64 %0 to float
+ ret float %1
+}
+
+define double @test1u(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test1u
+; CHECK: xxswapd [[SW:[0-9]+]], 34
+; CHECK: xscvuxddp 1, [[SW]]
+; CHECK-BE-LABEL: test1u
+; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34
+; CHECK-BE: xscvuxddp 1, [[CP]]
+ %0 = extractelement <2 x i64> %a, i32 0
+ %1 = uitofp i64 %0 to double
+ ret double %1
+}
+
+define double @test2u(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test2u
+; CHECK: xxlor [[CP:[0-9]+]], 34, 34
+; CHECK: xscvuxddp 1, [[CP]]
+; CHECK-BE-LABEL: test2u
+; CHECK-BE: xxswapd [[SW:[0-9]+]], 34
+; CHECK-BE: xscvuxddp 1, [[SW]]
+ %0 = extractelement <2 x i64> %a, i32 1
+ %1 = uitofp i64 %0 to double
+ ret double %1
+}
+
+define float @test1fu(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test1fu
+; CHECK: xxswapd [[SW:[0-9]+]], 34
+; CHECK: xscvuxdsp 1, [[SW]]
+; CHECK-BE-LABEL: test1fu
+; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34
+; CHECK-BE: xscvuxdsp 1, [[CP]]
+ %0 = extractelement <2 x i64> %a, i32 0
+ %1 = uitofp i64 %0 to float
+ ret float %1
+}
+
+define float @test2fu(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test2fu
+; CHECK: xxlor [[CP:[0-9]+]], 34, 34
+; CHECK: xscvuxdsp 1, [[CP]]
+; CHECK-BE-LABEL: test2fu
+; CHECK-BE: xxswapd [[SW:[0-9]+]], 34
+; CHECK-BE: xscvuxdsp 1, [[SW]]
+ %0 = extractelement <2 x i64> %a, i32 1
+ %1 = uitofp i64 %0 to float
+ ret float %1
+}
More information about the llvm-commits
mailing list