[llvm] r336316 - [Power9] Optimize codgen for conversions of int to float128
Lei Huang via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 5 00:46:01 PDT 2018
Author: lei
Date: Thu Jul 5 00:46:01 2018
New Revision: 336316
URL: http://llvm.org/viewvc/llvm-project?rev=336316&view=rev
Log:
[Power9] Optimize codgen for conversions of int to float128
Optimize code sequences for integer conversion to fp128 when the integer is a result of:
* float->int
* float->long
* double->int
* double->long
Differential Revision: https://reviews.llvm.org/D48429
Modified:
llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
llvm/trunk/test/CodeGen/PowerPC/f128-conv.ll
llvm/trunk/test/CodeGen/PowerPC/f128-passByValue.ll
Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td?rev=336316&r1=336315&r2=336316&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td Thu Jul 5 00:46:01 2018
@@ -2548,9 +2548,16 @@ let AddedComplexity = 400, Predicates =
def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vfrc, []>;
def : Pat<(f128 (sint_to_fp i64:$src)),
(f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+ def : Pat<(f128 (sint_to_fp (i64 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVSDQP $src))>;
+ def : Pat<(f128 (sint_to_fp (i32 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVSDQP (VEXTSW2Ds $src)))>;
+
def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vfrc, []>;
def : Pat<(f128 (uint_to_fp i64:$src)),
(f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+ def : Pat<(f128 (uint_to_fp (i64 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVUDQP $src))>;
// Convert (Un)Signed Word -> QP.
def : Pat<(f128 (sint_to_fp i32:$src)),
@@ -3220,6 +3227,11 @@ let AddedComplexity = 400, Predicates =
(f128 (XSCVUDQP
(EXTRACT_SUBREG (VEXTRACTUB Idx, $src), sub_64)))>;
}
+
+ // Unsiged int in vsx register -> QP
+ def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVUDQP
+ (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>;
} // IsBigEndian, HasP9Vector
let Predicates = [IsLittleEndian, HasP9Vector] in {
@@ -3286,6 +3298,11 @@ let AddedComplexity = 400, Predicates =
(EXTRACT_SUBREG
(VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>;
}
+
+ // Unsiged int in vsx register -> QP
+ def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVUDQP
+ (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 8)))>;
} // IsLittleEndian, HasP9Vector
// Convert (Un)Signed DWord in memory -> QP
Modified: llvm/trunk/test/CodeGen/PowerPC/f128-conv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/f128-conv.ll?rev=336316&r1=336315&r2=336316&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/f128-conv.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/f128-conv.ll Thu Jul 5 00:46:01 2018
@@ -714,3 +714,129 @@ entry:
store fp128 %conv, fp128* %res, align 16
ret void
}
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; Function Attrs: norecurse nounwind
+define void @cvdp2sw2qp(double %val, fp128* nocapture %res) {
+; CHECK-LABEL: cvdp2sw2qp:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscvdpsxws 2, 1
+; CHECK-NEXT: vextsw2d 2, 2
+; CHECK-NEXT: xscvsdqp 2, 2
+; CHECK-NEXT: stxv 2, 0(4)
+; CHECK-NEXT: blr
+entry:
+ %conv = fptosi double %val to i32
+ %conv1 = sitofp i32 %conv to fp128
+ store fp128 %conv1, fp128* %res, align 16
+ ret void
+}
+
+; Function Attrs: norecurse nounwind
+define void @cvdp2sdw2qp(double %val, fp128* nocapture %res) {
+; CHECK-LABEL: cvdp2sdw2qp:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscvdpsxds 2, 1
+; CHECK-NEXT: xscvsdqp 2, 2
+; CHECK-NEXT: stxv 2, 0(4)
+; CHECK-NEXT: blr
+entry:
+ %conv = fptosi double %val to i64
+ %conv1 = sitofp i64 %conv to fp128
+ store fp128 %conv1, fp128* %res, align 16
+ ret void
+}
+
+; Function Attrs: norecurse nounwind
+define void @cvsp2sw2qp(float %val, fp128* nocapture %res) {
+; CHECK-LABEL: cvsp2sw2qp:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscvdpsxws 2, 1
+; CHECK-NEXT: vextsw2d 2, 2
+; CHECK-NEXT: xscvsdqp 2, 2
+; CHECK-NEXT: stxv 2, 0(4)
+; CHECK-NEXT: blr
+entry:
+ %conv = fptosi float %val to i32
+ %conv1 = sitofp i32 %conv to fp128
+ store fp128 %conv1, fp128* %res, align 16
+ ret void
+}
+
+; Function Attrs: norecurse nounwind
+define void @cvsp2sdw2qp(float %val, fp128* nocapture %res) {
+; CHECK-LABEL: cvsp2sdw2qp:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscvdpsxds 2, 1
+; CHECK-NEXT: xscvsdqp 2, 2
+; CHECK-NEXT: stxv 2, 0(4)
+; CHECK-NEXT: blr
+entry:
+ %conv = fptosi float %val to i64
+ %conv1 = sitofp i64 %conv to fp128
+ store fp128 %conv1, fp128* %res, align 16
+ ret void
+}
+
+; Function Attrs: norecurse nounwind
+define void @cvdp2uw2qp(double %val, fp128* nocapture %res) {
+; CHECK-LABEL: cvdp2uw2qp:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscvdpuxws 0, 1
+; CHECK-NEXT: xxextractuw 2, 0, 8
+; CHECK-NEXT: xscvudqp 2, 2
+; CHECK-NEXT: stxv 2, 0(4)
+; CHECK-NEXT: blr
+entry:
+ %conv = fptoui double %val to i32
+ %conv1 = uitofp i32 %conv to fp128
+ store fp128 %conv1, fp128* %res, align 16
+ ret void
+}
+
+; Function Attrs: norecurse nounwind
+define void @cvdp2udw2qp(double %val, fp128* nocapture %res) {
+; CHECK-LABEL: cvdp2udw2qp:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscvdpuxds 2, 1
+; CHECK-NEXT: xscvudqp 2, 2
+; CHECK-NEXT: stxv 2, 0(4)
+; CHECK-NEXT: blr
+entry:
+ %conv = fptoui double %val to i64
+ %conv1 = uitofp i64 %conv to fp128
+ store fp128 %conv1, fp128* %res, align 16
+ ret void
+}
+
+; Function Attrs: norecurse nounwind
+define void @cvsp2uw2qp(float %val, fp128* nocapture %res) {
+; CHECK-LABEL: cvsp2uw2qp:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscvdpuxws 0, 1
+; CHECK-NEXT: xxextractuw 2, 0, 8
+; CHECK-NEXT: xscvudqp 2, 2
+; CHECK-NEXT: stxv 2, 0(4)
+; CHECK-NEXT: blr
+entry:
+ %conv = fptoui float %val to i32
+ %conv1 = uitofp i32 %conv to fp128
+ store fp128 %conv1, fp128* %res, align 16
+ ret void
+}
+
+; Function Attrs: norecurse nounwind
+define void @cvsp2udw2qp(float %val, fp128* nocapture %res) {
+; CHECK-LABEL: cvsp2udw2qp:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscvdpuxds 2, 1
+; CHECK-NEXT: xscvudqp 2, 2
+; CHECK-NEXT: stxv 2, 0(4)
+; CHECK-NEXT: blr
+entry:
+ %conv = fptoui float %val to i64
+ %conv1 = uitofp i64 %conv to fp128
+ store fp128 %conv1, fp128* %res, align 16
+ ret void
+}
Modified: llvm/trunk/test/CodeGen/PowerPC/f128-passByValue.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/f128-passByValue.ll?rev=336316&r1=336315&r2=336316&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/f128-passByValue.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/f128-passByValue.ll Thu Jul 5 00:46:01 2018
@@ -218,12 +218,12 @@ entry:
define void @mixParam_03(fp128 %f1, double* nocapture %d1, <4 x i32> %vec1,
; CHECK-LABEL: mixParam_03:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: ld 3, 104(1)
-; CHECK-NEXT: mtvsrwa [[REG2:[0-9]+]], 10
-; CHECK-NEXT: stxv [[REG0:[0-9]+]], 0(9)
-; CHECK-NEXT: stxvx [[REG1:[0-9]+]], 0, 3
-; CHECK-NEXT: lxv [[REG0]], 0(9)
-; CHECK-NEXT: xscvsdqp [[REG1]], [[REG2]]
+; CHECK-DAG: ld 3, 104(1)
+; CHECK-DAG: mtvsrwa [[REG2:[0-9]+]], 10
+; CHECK-DAG: stxv 2, 0(9)
+; CHECK-DAG: xscvsdqp [[REG1:[0-9]+]], [[REG2]]
+; CHECK: stxvx 3, 0, 3
+; CHECK-NEXT: lxv 2, 0(9)
; CHECK-NEXT: xsaddqp 2, 2, [[REG1]]
; CHECK-NEXT: xscvqpdp 2, 2
; CHECK-NEXT: stxsd 2, 0(5)
@@ -249,10 +249,10 @@ define fastcc void @mixParam_03f(fp128 %
; CHECK-NEXT: stxv [[REG1:[0-9]+]], 0(4)
; CHECK-NEXT: stxv [[REG2:[0-9]+]], 0(7)
; CHECK-NEXT: lxv [[REG1]], 0(4)
-; CHECK-NEXT: xscvsdqp [[REG2]], [[REG0]]
-; CHECK-NEXT: xsaddqp [[REG1]], [[REG1]], [[REG2]]
-; CHECK-NEXT: xscvqpdp [[REG1]], [[REG1]]
-; CHECK-NEXT: stxsd [[REG1]], 0(3)
+; CHECK-NEXT: xscvsdqp [[REG3:[0-9]+]], [[REG0]]
+; CHECK-NEXT: xsaddqp [[REG4:[0-9]+]], [[REG1]], [[REG3]]
+; CHECK-NEXT: xscvqpdp 2, [[REG4]]
+; CHECK-NEXT: stxsd 2, 0(3)
; CHECK-NEXT: blr
fp128* nocapture %f2, i32 signext %i1, i8 zeroext %c1,
<4 x i32>* nocapture %vec2) {
More information about the llvm-commits
mailing list