[llvm] cceb630 - [x86] use vector instructions to lower more FP->int->FP casts
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 19 05:33:24 PDT 2020
Author: Sanjay Patel
Date: 2020-04-19T08:33:17-04:00
New Revision: cceb630a07cc0852f81cb2a44aef8f248504bd0f
URL: https://github.com/llvm/llvm-project/commit/cceb630a07cc0852f81cb2a44aef8f248504bd0f
DIFF: https://github.com/llvm/llvm-project/commit/cceb630a07cc0852f81cb2a44aef8f248504bd0f.diff
LOG: [x86] use vector instructions to lower more FP->int->FP casts
This is an enhancement to D77895 to avoid another
round-trip from XMM->GPR->XMM. This time we handle
the case of starting/ending with an f64 and casting
to signed i32 as the intermediate value.
It's a bit more involved than I initially assumed
because we need to use target-specific opcodes to
represent the non-standard cast ops.
Differential Revision: https://reviews.llvm.org/D78362
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/ftrunc.ll
llvm/test/CodeGen/X86/isint.ll
llvm/test/CodeGen/X86/setoeq.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1ae8a8be5185..375e21a0d40f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19178,17 +19178,25 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, SelectionDAG &DAG,
MVT IntVT = CastToInt.getSimpleValueType();
SDValue X = CastToInt.getOperand(0);
// TODO: Allow size-changing from source to dest (double -> i32 -> float)
- if (X.getSimpleValueType() != VT ||
- VT.getSizeInBits() != IntVT.getSizeInBits())
+ if (X.getSimpleValueType() != VT)
return SDValue();
- // See if we have a 128-bit vector cast op for this type of cast.
- unsigned NumEltsInXMM = 128 / VT.getScalarSizeInBits();
- MVT VecFPVT = MVT::getVectorVT(VT, NumEltsInXMM);
- MVT VecIntVT = MVT::getVectorVT(IntVT, NumEltsInXMM);
- if (!useVectorCast(CastToFP.getOpcode(), VecIntVT, VecFPVT, Subtarget))
+ // See if we have 128-bit vector cast instructions for this type of cast.
+ // We need cvttps2dq + cvtdq2ps or cvttpd2dq + cvtdq2pd.
+ if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
+ IntVT != MVT::i32)
return SDValue();
+ unsigned NumFPEltsInXMM = 128 / VT.getScalarSizeInBits();
+ unsigned NumIntEltsInXMM = 128 / IntVT.getScalarSizeInBits();
+ MVT VecFPVT = MVT::getVectorVT(VT, NumFPEltsInXMM);
+ MVT VecIntVT = MVT::getVectorVT(IntVT, NumIntEltsInXMM);
+
+ // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.
+ bool NeedX86Opcodes = VT.getSizeInBits() != IntVT.getSizeInBits();
+ unsigned ToIntOpcode = NeedX86Opcodes ? X86ISD::CVTTP2SI : ISD::FP_TO_SINT;
+ unsigned ToFPOpcode = NeedX86Opcodes ? X86ISD::CVTSI2P : ISD::SINT_TO_FP;
+
// sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
//
// We are not defining the high elements (for example, zero them) because
@@ -19198,8 +19206,8 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, SelectionDAG &DAG,
SDLoc DL(CastToFP);
SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL);
SDValue VecX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecFPVT, X);
- SDValue VCastToInt = DAG.getNode(ISD::FP_TO_SINT, DL, VecIntVT, VecX);
- SDValue VCastToFP = DAG.getNode(ISD::SINT_TO_FP, DL, VecFPVT, VCastToInt);
+ SDValue VCastToInt = DAG.getNode(ToIntOpcode, DL, VecIntVT, VecX);
+ SDValue VCastToFP = DAG.getNode(ToFPOpcode, DL, VecFPVT, VCastToInt);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VCastToFP, ZeroIdx);
}
diff --git a/llvm/test/CodeGen/X86/ftrunc.ll b/llvm/test/CodeGen/X86/ftrunc.ll
index abadcaa5cf6c..fefb094f1a16 100644
--- a/llvm/test/CodeGen/X86/ftrunc.ll
+++ b/llvm/test/CodeGen/X86/ftrunc.ll
@@ -263,15 +263,14 @@ define float @trunc_signed_f32_nsz(float %x) #0 {
define double @trunc_signed32_f64_no_fast_math(double %x) {
; SSE-LABEL: trunc_signed32_f64_no_fast_math:
; SSE: # %bb.0:
-; SSE-NEXT: cvttsd2si %xmm0, %eax
-; SSE-NEXT: xorps %xmm0, %xmm0
-; SSE-NEXT: cvtsi2sd %eax, %xmm0
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_signed32_f64_no_fast_math:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcvttsd2si %xmm0, %eax
-; AVX1-NEXT: vcvtsi2sd %eax, %xmm1, %xmm0
+; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX1-NEXT: retq
%i = fptosi double %x to i32
%r = sitofp i32 %i to double
@@ -281,9 +280,8 @@ define double @trunc_signed32_f64_no_fast_math(double %x) {
define double @trunc_signed32_f64_nsz(double %x) #0 {
; SSE2-LABEL: trunc_signed32_f64_nsz:
; SSE2: # %bb.0:
-; SSE2-NEXT: cvttsd2si %xmm0, %eax
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2sd %eax, %xmm0
+; SSE2-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: trunc_signed32_f64_nsz:
diff --git a/llvm/test/CodeGen/X86/isint.ll b/llvm/test/CodeGen/X86/isint.ll
index 9d86b4b81bda..059b39f1a67d 100644
--- a/llvm/test/CodeGen/X86/isint.ll
+++ b/llvm/test/CodeGen/X86/isint.ll
@@ -7,8 +7,8 @@
define i32 @isint_return(double %d) nounwind {
; CHECK64-LABEL: isint_return:
; CHECK64: # %bb.0:
-; CHECK64-NEXT: cvttsd2si %xmm0, %eax
-; CHECK64-NEXT: cvtsi2sd %eax, %xmm1
+; CHECK64-NEXT: cvttpd2dq %xmm0, %xmm1
+; CHECK64-NEXT: cvtdq2pd %xmm1, %xmm1
; CHECK64-NEXT: cmpeqsd %xmm0, %xmm1
; CHECK64-NEXT: movq %xmm1, %rax
; CHECK64-NEXT: andl $1, %eax
@@ -18,8 +18,8 @@ define i32 @isint_return(double %d) nounwind {
; CHECK32-LABEL: isint_return:
; CHECK32: # %bb.0:
; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK32-NEXT: cvttsd2si %xmm0, %eax
-; CHECK32-NEXT: cvtsi2sd %eax, %xmm1
+; CHECK32-NEXT: cvttpd2dq %xmm0, %xmm1
+; CHECK32-NEXT: cvtdq2pd %xmm1, %xmm1
; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1
; CHECK32-NEXT: movd %xmm1, %eax
; CHECK32-NEXT: andl $1, %eax
@@ -62,8 +62,8 @@ declare void @foo()
define void @isint_branch(double %d) nounwind {
; CHECK64-LABEL: isint_branch:
; CHECK64: # %bb.0:
-; CHECK64-NEXT: cvttsd2si %xmm0, %eax
-; CHECK64-NEXT: cvtsi2sd %eax, %xmm1
+; CHECK64-NEXT: cvttpd2dq %xmm0, %xmm1
+; CHECK64-NEXT: cvtdq2pd %xmm1, %xmm1
; CHECK64-NEXT: ucomisd %xmm1, %xmm0
; CHECK64-NEXT: jne .LBB2_2
; CHECK64-NEXT: jp .LBB2_2
@@ -77,8 +77,8 @@ define void @isint_branch(double %d) nounwind {
; CHECK32-LABEL: isint_branch:
; CHECK32: # %bb.0:
; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK32-NEXT: cvttsd2si %xmm0, %eax
-; CHECK32-NEXT: cvtsi2sd %eax, %xmm1
+; CHECK32-NEXT: cvttpd2dq %xmm0, %xmm1
+; CHECK32-NEXT: cvtdq2pd %xmm1, %xmm1
; CHECK32-NEXT: ucomisd %xmm1, %xmm0
; CHECK32-NEXT: jne .LBB2_2
; CHECK32-NEXT: jp .LBB2_2
diff --git a/llvm/test/CodeGen/X86/setoeq.ll b/llvm/test/CodeGen/X86/setoeq.ll
index 89069498c295..f0addf4b6459 100644
--- a/llvm/test/CodeGen/X86/setoeq.ll
+++ b/llvm/test/CodeGen/X86/setoeq.ll
@@ -5,8 +5,8 @@ define zeroext i8 @t(double %x) nounwind readnone {
; CHECK-LABEL: t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: cvttsd2si %xmm0, %eax
-; CHECK-NEXT: cvtsi2sd %eax, %xmm1
+; CHECK-NEXT: cvttpd2dq %xmm0, %xmm1
+; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1
; CHECK-NEXT: cmpeqsd %xmm0, %xmm1
; CHECK-NEXT: movd %xmm1, %eax
; CHECK-NEXT: andl $1, %eax
@@ -24,8 +24,8 @@ define zeroext i8 @u(double %x) nounwind readnone {
; CHECK-LABEL: u:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: cvttsd2si %xmm0, %eax
-; CHECK-NEXT: cvtsi2sd %eax, %xmm1
+; CHECK-NEXT: cvttpd2dq %xmm0, %xmm1
+; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1
; CHECK-NEXT: cmpneqsd %xmm0, %xmm1
; CHECK-NEXT: movd %xmm1, %eax
; CHECK-NEXT: andl $1, %eax
More information about the llvm-commits
mailing list