[llvm-branch-commits] [llvm] 9bf0fea - [PowerPC] Add the hw sqrt test for vector type v4f32/v2f64
QingShan Zhang via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Dec 2 19:24:07 PST 2020
Author: QingShan Zhang
Date: 2020-12-03T03:19:18Z
New Revision: 9bf0fea3729e3ad63da24f94ce22c6b4628bec15
URL: https://github.com/llvm/llvm-project/commit/9bf0fea3729e3ad63da24f94ce22c6b4628bec15
DIFF: https://github.com/llvm/llvm-project/commit/9bf0fea3729e3ad63da24f94ce22c6b4628bec15.diff
LOG: [PowerPC] Add the hw sqrt test for vector type v4f32/v2f64
PowerPC ISA support the input test for vector type v4f32 and v2f64.
Replace the software compare with hw test will improve the perf.
Reviewed By: ChenZheng
Differential Revision: https://reviews.llvm.org/D90914
Added:
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/test/CodeGen/PowerPC/recipest.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f9f84aa668bc..101ef686c180 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12760,9 +12760,10 @@ static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
const DenormalMode &Mode) const {
- // TODO - add support for v2f64/v4f32
+ // We only have VSX Vector Test for software Square Root.
EVT VT = Op.getValueType();
- if (VT != MVT::f64)
+ if (VT != MVT::f64 &&
+ ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
return SDValue();
SDLoc DL(Op);
@@ -12788,9 +12789,10 @@ SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
SDValue
PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
SelectionDAG &DAG) const {
- // TODO - add support for v2f64/v4f32
+ // We only have VSX Vector Square Root.
EVT VT = Op.getValueType();
- if (VT != MVT::f64)
+ if (VT != MVT::f64 &&
+ ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
return TargetLowering::getSqrtResultForDenormInput(Op, DAG);
return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index e778ca4be6b5..35a0abcfd632 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -640,10 +640,12 @@ let hasSideEffects = 0 in {
def XVTSQRTDP : XX2Form_1<60, 234,
(outs crrc:$crD), (ins vsrc:$XB),
- "xvtsqrtdp $crD, $XB", IIC_FPCompare, []>;
+ "xvtsqrtdp $crD, $XB", IIC_FPCompare,
+ [(set i32:$crD, (PPCftsqrt v2f64:$XB))]>;
def XVTSQRTSP : XX2Form_1<60, 170,
(outs crrc:$crD), (ins vsrc:$XB),
- "xvtsqrtsp $crD, $XB", IIC_FPCompare, []>;
+ "xvtsqrtsp $crD, $XB", IIC_FPCompare,
+ [(set i32:$crD, (PPCftsqrt v4f32:$XB))]>;
}
def XVDIVDP : XX3Form<60, 120,
@@ -2464,6 +2466,8 @@ def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, (fneg v4f32:$C)),
(XVNMADDASP $C, $A, $B)>;
def : Pat<(PPCfsqrt f64:$frA), (XSSQRTDP $frA)>;
+def : Pat<(PPCfsqrt v2f64:$frA), (XVSQRTDP $frA)>;
+def : Pat<(PPCfsqrt v4f32:$frA), (XVSQRTSP $frA)>;
def : Pat<(v2f64 (bitconvert v4f32:$A)),
(COPY_TO_REGCLASS $A, VSRC)>;
diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll
index 3d9f2efc32e0..46da4cc6c471 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -953,24 +953,30 @@ define <4 x float> @hoo3_fmf(<4 x float> %a) #1 {
;
; CHECK-P8-LABEL: hoo3_fmf:
; CHECK-P8: # %bb.0:
+; CHECK-P8-NEXT: xvtsqrtsp 0, 34
+; CHECK-P8-NEXT: bc 12, 2, .LBB24_2
+; CHECK-P8-NEXT: # %bb.1:
; CHECK-P8-NEXT: xvrsqrtesp 0, 34
; CHECK-P8-NEXT: addis 3, 2, .LCPI24_0 at toc@ha
; CHECK-P8-NEXT: addis 4, 2, .LCPI24_1 at toc@ha
; CHECK-P8-NEXT: addi 3, 3, .LCPI24_0 at toc@l
-; CHECK-P8-NEXT: lvx 3, 0, 3
-; CHECK-P8-NEXT: addi 3, 4, .LCPI24_1 at toc@l
-; CHECK-P8-NEXT: lvx 4, 0, 3
; CHECK-P8-NEXT: xvmulsp 1, 34, 0
-; CHECK-P8-NEXT: xvmaddasp 35, 1, 0
-; CHECK-P8-NEXT: xvmulsp 0, 1, 36
-; CHECK-P8-NEXT: xxlxor 1, 1, 1
-; CHECK-P8-NEXT: xvcmpeqsp 2, 34, 1
-; CHECK-P8-NEXT: xvmulsp 0, 0, 35
-; CHECK-P8-NEXT: xxsel 34, 0, 1, 2
+; CHECK-P8-NEXT: lvx 2, 0, 3
+; CHECK-P8-NEXT: addi 3, 4, .LCPI24_1 at toc@l
+; CHECK-P8-NEXT: lvx 3, 0, 3
+; CHECK-P8-NEXT: xvmaddasp 34, 1, 0
+; CHECK-P8-NEXT: xvmulsp 0, 1, 35
+; CHECK-P8-NEXT: xvmulsp 34, 0, 34
+; CHECK-P8-NEXT: blr
+; CHECK-P8-NEXT: .LBB24_2:
+; CHECK-P8-NEXT: xvsqrtsp 34, 34
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: hoo3_fmf:
; CHECK-P9: # %bb.0:
+; CHECK-P9-NEXT: xvtsqrtsp 0, 34
+; CHECK-P9-NEXT: bc 12, 2, .LBB24_2
+; CHECK-P9-NEXT: # %bb.1:
; CHECK-P9-NEXT: xvrsqrtesp 0, 34
; CHECK-P9-NEXT: addis 3, 2, .LCPI24_0 at toc@ha
; CHECK-P9-NEXT: addi 3, 3, .LCPI24_0 at toc@l
@@ -981,10 +987,10 @@ define <4 x float> @hoo3_fmf(<4 x float> %a) #1 {
; CHECK-P9-NEXT: xvmaddasp 2, 1, 0
; CHECK-P9-NEXT: lxvx 0, 0, 3
; CHECK-P9-NEXT: xvmulsp 0, 1, 0
-; CHECK-P9-NEXT: xxlxor 1, 1, 1
-; CHECK-P9-NEXT: xvmulsp 0, 0, 2
-; CHECK-P9-NEXT: xvcmpeqsp 2, 34, 1
-; CHECK-P9-NEXT: xxsel 34, 0, 1, 2
+; CHECK-P9-NEXT: xvmulsp 34, 0, 2
+; CHECK-P9-NEXT: blr
+; CHECK-P9-NEXT: .LBB24_2:
+; CHECK-P9-NEXT: xvsqrtsp 34, 34
; CHECK-P9-NEXT: blr
%r = call reassoc ninf afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
ret <4 x float> %r
@@ -1066,6 +1072,9 @@ define <2 x double> @hoo4_fmf(<2 x double> %a) #1 {
;
; CHECK-P8-LABEL: hoo4_fmf:
; CHECK-P8: # %bb.0:
+; CHECK-P8-NEXT: xvtsqrtdp 0, 34
+; CHECK-P8-NEXT: bc 12, 2, .LBB26_2
+; CHECK-P8-NEXT: # %bb.1:
; CHECK-P8-NEXT: xvrsqrtedp 0, 34
; CHECK-P8-NEXT: addis 3, 2, .LCPI26_0 at toc@ha
; CHECK-P8-NEXT: addi 3, 3, .LCPI26_0 at toc@l
@@ -1083,14 +1092,17 @@ define <2 x double> @hoo4_fmf(<2 x double> %a) #1 {
; CHECK-P8-NEXT: xvmuldp 2, 34, 0
; CHECK-P8-NEXT: xvmaddadp 1, 2, 0
; CHECK-P8-NEXT: xvmuldp 0, 2, 3
-; CHECK-P8-NEXT: xxlxor 2, 2, 2
-; CHECK-P8-NEXT: xvcmpeqdp 34, 34, 2
-; CHECK-P8-NEXT: xvmuldp 0, 0, 1
-; CHECK-P8-NEXT: xxsel 34, 0, 2, 34
+; CHECK-P8-NEXT: xvmuldp 34, 0, 1
+; CHECK-P8-NEXT: blr
+; CHECK-P8-NEXT: .LBB26_2:
+; CHECK-P8-NEXT: xvsqrtdp 34, 34
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: hoo4_fmf:
; CHECK-P9: # %bb.0:
+; CHECK-P9-NEXT: xvtsqrtdp 0, 34
+; CHECK-P9-NEXT: bc 12, 2, .LBB26_2
+; CHECK-P9-NEXT: # %bb.1:
; CHECK-P9-NEXT: xvrsqrtedp 0, 34
; CHECK-P9-NEXT: addis 3, 2, .LCPI26_0 at toc@ha
; CHECK-P9-NEXT: addi 3, 3, .LCPI26_0 at toc@l
@@ -1106,10 +1118,10 @@ define <2 x double> @hoo4_fmf(<2 x double> %a) #1 {
; CHECK-P9-NEXT: xvmuldp 3, 34, 0
; CHECK-P9-NEXT: xvmaddadp 2, 3, 0
; CHECK-P9-NEXT: xvmuldp 0, 3, 1
-; CHECK-P9-NEXT: xxlxor 1, 1, 1
-; CHECK-P9-NEXT: xvcmpeqdp 34, 34, 1
-; CHECK-P9-NEXT: xvmuldp 0, 0, 2
-; CHECK-P9-NEXT: xxsel 34, 0, 1, 34
+; CHECK-P9-NEXT: xvmuldp 34, 0, 2
+; CHECK-P9-NEXT: blr
+; CHECK-P9-NEXT: .LBB26_2:
+; CHECK-P9-NEXT: xvsqrtdp 34, 34
; CHECK-P9-NEXT: blr
%r = call reassoc ninf afn <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
ret <2 x double> %r
More information about the llvm-branch-commits
mailing list