[llvm] r225248 - [PowerPC] Improve int_to_fp(fp_to_int(x)) combining
Hal Finkel
hfinkel at anl.gov
Mon Jan 5 22:01:58 PST 2015
Author: hfinkel
Date: Tue Jan 6 00:01:57 2015
New Revision: 225248
URL: http://llvm.org/viewvc/llvm-project?rev=225248&view=rev
Log:
[PowerPC] Improve int_to_fp(fp_to_int(x)) combining
The old target DAG combine that allowed for performing int_to_fp(fp_to_int(x))
without a load/store pair is updated here with support for unsigned integers,
and to support single-precision values without a third rounding step, on newer
cores with the appropriate instructions.
Added:
llvm/trunk/test/CodeGen/PowerPC/fp-to-int-to-fp.ll
Modified:
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=225248&r1=225247&r2=225248&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue Jan 6 00:01:57 2015
@@ -631,6 +631,8 @@ PPCTargetLowering::PPCTargetLowering(con
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::SINT_TO_FP);
+ if (Subtarget.hasFPCVT())
+ setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::BR_CC);
@@ -8349,6 +8351,75 @@ SDValue PPCTargetLowering::DAGCombineExt
N->getOperand(0), ShiftCst), ShiftCst);
}
+SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert((N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::UINT_TO_FP) &&
+ "Need an int -> FP conversion node here");
+
+ if (!Subtarget.has64BitSupport())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ SDValue Op(N, 0);
+
+ // Don't handle ppc_fp128 here or i1 conversions.
+ if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
+ return SDValue();
+ if (Op.getOperand(0).getValueType() == MVT::i1)
+ return SDValue();
+
+ // For i32 intermediate values, unfortunately, the conversion functions
+ // leave the upper 32 bits of the value are undefined. Within the set of
+ // scalar instructions, we have no method for zero- or sign-extending the
+ // value. Thus, we cannot handle i32 intermediate values here.
+ if (Op.getOperand(0).getValueType() == MVT::i32)
+ return SDValue();
+
+ assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
+ "UINT_TO_FP is supported only with FPCVT");
+
+ // If we have FCFIDS, then use it when converting to single-precision.
+ // Otherwise, convert to double-precision and then round.
+ unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ (Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::FCFIDUS : PPCISD::FCFIDS) :
+ (Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::FCFIDU : PPCISD::FCFID);
+ MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ MVT::f32 : MVT::f64;
+
+ // If we're converting from a float, to an int, and back to a float again,
+ // then we don't need the store/load pair at all.
+ if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
+ Subtarget.hasFPCVT()) ||
+ (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
+ SDValue Src = Op.getOperand(0).getOperand(0);
+ if (Src.getValueType() == MVT::f32) {
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+ DCI.AddToWorklist(Src.getNode());
+ }
+
+ unsigned FCTOp =
+ Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+ PPCISD::FCTIDUZ;
+
+ SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
+
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
+ FP = DAG.getNode(ISD::FP_ROUND, dl,
+ MVT::f32, FP, DAG.getIntPtrConstant(0));
+ DCI.AddToWorklist(FP.getNode());
+ }
+
+ return FP;
+ }
+
+ return SDValue();
+}
+
// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
// builtins) into loads with swaps.
SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
@@ -8483,36 +8554,8 @@ SDValue PPCTargetLowering::PerformDAGCom
case ISD::SELECT_CC:
return DAGCombineTruncBoolExt(N, DCI);
case ISD::SINT_TO_FP:
- if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
- if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
- // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
- // We allow the src/dst to be either f32/f64, but the intermediate
- // type must be i64.
- if (N->getOperand(0).getValueType() == MVT::i64 &&
- N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
- SDValue Val = N->getOperand(0).getOperand(0);
- if (Val.getValueType() == MVT::f32) {
- Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
- DCI.AddToWorklist(Val.getNode());
- }
-
- Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
- DCI.AddToWorklist(Val.getNode());
- Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
- DCI.AddToWorklist(Val.getNode());
- if (N->getValueType(0) == MVT::f32) {
- Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
- DAG.getIntPtrConstant(0));
- DCI.AddToWorklist(Val.getNode());
- }
- return Val;
- } else if (N->getOperand(0).getValueType() == MVT::i32) {
- // If the intermediate type is i32, we can avoid the load/store here
- // too.
- }
- }
- }
- break;
+ case ISD::UINT_TO_FP:
+ return combineFPToIntToFP(N, DCI);
case ISD::STORE: {
// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=225248&r1=225247&r2=225248&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Tue Jan 6 00:01:57 2015
@@ -748,6 +748,7 @@ namespace llvm {
SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
unsigned &RefinementSteps,
Added: llvm/trunk/test/CodeGen/PowerPC/fp-to-int-to-fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fp-to-int-to-fp.ll?rev=225248&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/fp-to-int-to-fp.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/fp-to-int-to-fp.ll Tue Jan 6 00:01:57 2015
@@ -0,0 +1,70 @@
+; RUN: llc -mcpu=a2 < %s | FileCheck %s -check-prefix=FPCVT
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s -check-prefix=PPC64
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define float @fool(float %X) #0 {
+entry:
+ %conv = fptosi float %X to i64
+ %conv1 = sitofp i64 %conv to float
+ ret float %conv1
+
+; FPCVT-LABEL: @fool
+; FPCVT: fctidz [[REG1:[0-9]+]], 1
+; FPCVT: fcfids 1, [[REG1]]
+; FPCVT: blr
+
+; PPC64-LABEL: @fool
+; PPC64: fctidz [[REG1:[0-9]+]], 1
+; PPC64: fcfid [[REG2:[0-9]+]], [[REG1]]
+; PPC64: frsp 1, [[REG2]]
+; PPC64: blr
+}
+
+; Function Attrs: nounwind readnone
+define double @foodl(double %X) #0 {
+entry:
+ %conv = fptosi double %X to i64
+ %conv1 = sitofp i64 %conv to double
+ ret double %conv1
+
+; FPCVT-LABEL: @foodl
+; FPCVT: fctidz [[REG1:[0-9]+]], 1
+; FPCVT: fcfid 1, [[REG1]]
+; FPCVT: blr
+
+; PPC64-LABEL: @foodl
+; PPC64: fctidz [[REG1:[0-9]+]], 1
+; PPC64: fcfid 1, [[REG1]]
+; PPC64: blr
+}
+
+; Function Attrs: nounwind readnone
+define float @fooul(float %X) #0 {
+entry:
+ %conv = fptoui float %X to i64
+ %conv1 = uitofp i64 %conv to float
+ ret float %conv1
+
+; FPCVT-LABEL: @fooul
+; FPCVT: fctiduz [[REG1:[0-9]+]], 1
+; FPCVT: fcfidus 1, [[REG1]]
+; FPCVT: blr
+}
+
+; Function Attrs: nounwind readnone
+define double @fooudl(double %X) #0 {
+entry:
+ %conv = fptoui double %X to i64
+ %conv1 = uitofp i64 %conv to double
+ ret double %conv1
+
+; FPCVT-LABEL: @fooudl
+; FPCVT: fctiduz [[REG1:[0-9]+]], 1
+; FPCVT: fcfidu 1, [[REG1]]
+; FPCVT: blr
+}
+
+attributes #0 = { nounwind readnone }
+
More information about the llvm-commits
mailing list