[PATCH] D77895: [x86] use vector instructions to lower FP->int->FP casts
Sanjay Patel via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 12 07:28:46 PDT 2020
This revision was automatically updated to reflect the committed changes.
spatel marked an inline comment as done.
Closed by commit rGd04db4825a4d: [x86] use vector instructions to lower FP->int->FP casts (authored by spatel).
Herald added a project: LLVM.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D77895/new/
https://reviews.llvm.org/D77895
Files:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/ftrunc.ll
Index: llvm/test/CodeGen/X86/ftrunc.ll
===================================================================
--- llvm/test/CodeGen/X86/ftrunc.ll
+++ llvm/test/CodeGen/X86/ftrunc.ll
@@ -223,15 +223,14 @@
define float @trunc_signed_f32_no_fast_math(float %x) {
; SSE-LABEL: trunc_signed_f32_no_fast_math:
; SSE: # %bb.0:
-; SSE-NEXT: cvttss2si %xmm0, %eax
-; SSE-NEXT: xorps %xmm0, %xmm0
-; SSE-NEXT: cvtsi2ss %eax, %xmm0
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_signed_f32_no_fast_math:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcvttss2si %xmm0, %eax
-; AVX1-NEXT: vcvtsi2ss %eax, %xmm1, %xmm0
+; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX1-NEXT: retq
%i = fptosi float %x to i32
%r = sitofp i32 %i to float
@@ -241,9 +240,8 @@
define float @trunc_signed_f32(float %x) #0 {
; SSE2-LABEL: trunc_signed_f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: cvttss2si %xmm0, %eax
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %eax, %xmm0
+; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: trunc_signed_f32:
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19141,6 +19141,45 @@
DAG.getIntPtrConstant(0, DL));
}
+/// Given a scalar cast to FP with a cast to integer operand (almost an ftrunc),
+/// try to vectorize the cast ops. This will avoid an expensive round-trip
+/// between XMM and GPR.
+static SDValue lowerFPToIntToFP(SDValue CastToFP, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // TODO: Allow FP_TO_UINT.
+ SDValue CastToInt = CastToFP.getOperand(0);
+ MVT VT = CastToFP.getSimpleValueType();
+ if (CastToInt.getOpcode() != ISD::FP_TO_SINT || VT.isVector())
+ return SDValue();
+
+ MVT IntVT = CastToInt.getSimpleValueType();
+ SDValue X = CastToInt.getOperand(0);
+ // TODO: Allow size-changing from source to dest (double -> i32 -> float)
+ if (X.getSimpleValueType() != VT ||
+ VT.getSizeInBits() != IntVT.getSizeInBits())
+ return SDValue();
+
+ // See if we have a 128-bit vector cast op for this type of cast.
+ unsigned NumEltsInXMM = 128 / VT.getScalarSizeInBits();
+ MVT Vec128VT = MVT::getVectorVT(VT, NumEltsInXMM);
+ MVT Int128VT = MVT::getVectorVT(IntVT, NumEltsInXMM);
+ if (!useVectorCast(CastToFP.getOpcode(), Int128VT, Vec128VT, Subtarget))
+ return SDValue();
+
+ // sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
+ //
+ // We are not defining the high elements (for example, zero them) because
+ // that could nullify any performance advantage that we hoped to gain from
+ // this vector op hack. We do not expect any adverse effects (like denorm
+ // penalties) with cast ops.
+ SDLoc DL(CastToFP);
+ SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL);
+ SDValue VecX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, Vec128VT, X);
+ SDValue VCastToInt = DAG.getNode(ISD::FP_TO_SINT, DL, Int128VT, VecX);
+ SDValue VCastToFP = DAG.getNode(ISD::SINT_TO_FP, DL, Vec128VT, VCastToInt);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VCastToFP, ZeroIdx);
+}
+
static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc DL(Op);
@@ -19243,6 +19282,9 @@
if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
return Extract;
+ if (SDValue R = lowerFPToIntToFP(Op, DAG, Subtarget))
+ return R;
+
if (SrcVT.isVector()) {
if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
// Note: Since v2f64 is a legal type. We don't need to zero extend the
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D77895.256854.patch
Type: text/x-patch
Size: 3901 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200412/e0bc5667/attachment.bin>
More information about the llvm-commits
mailing list