[llvm] 20aab49 - [X86] Custom widen v2i32->v2f32 strict_sint_to_fp to avoid scalarization.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 27 00:29:17 PST 2019
Author: Craig Topper
Date: 2019-12-27T00:28:44-08:00
New Revision: 20aab49492043acd5a6fe28ad80ddaa667ccfc9e
URL: https://github.com/llvm/llvm-project/commit/20aab49492043acd5a6fe28ad80ddaa667ccfc9e
DIFF: https://github.com/llvm/llvm-project/commit/20aab49492043acd5a6fe28ad80ddaa667ccfc9e.diff
LOG: [X86] Custom widen v2i32->v2f32 strict_sint_to_fp to avoid scalarization.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 530a7908e03f..cfb5e1187cdf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -987,6 +987,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
// Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
@@ -1848,8 +1850,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasDQI()) {
// Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
// v2f32 UINT_TO_FP is already custom under SSE2.
- setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&
isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&
"Unexpected operation action!");
@@ -29009,8 +29009,24 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
- if (SrcVT != MVT::v2i32 || IsSigned)
+ if (SrcVT != MVT::v2i32)
return;
+
+ if (IsSigned) {
+ if (!IsStrict)
+ return;
+
+ // Custom widen strict v2i32->v2f32 to avoid scalarization.
+ // FIXME: Should generic type legalizer do this?
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
+ DAG.getConstant(0, dl, MVT::v2i32));
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4f32, MVT::Other},
+ {N->getOperand(0), Src});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ return;
+ }
+
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, Src);
SDValue VBias =
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
index daf4bb553677..9cc2c2f9a5b2 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
@@ -38,23 +38,14 @@ declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64>
define <2 x float> @sitofp_v2i32_v2f32(<2 x i32> %x) #0 {
; SSE-LABEL: sitofp_v2i32_v2f32:
; SSE: # %bb.0:
-; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: cvtsi2ss %eax, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: xorps %xmm0, %xmm0
-; SSE-NEXT: cvtsi2ss %eax, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: sitofp_v2i32_v2f32:
; AVX: # %bb.0:
-; AVX-NEXT: vextractps $1, %xmm0, %eax
-; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm1
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0
-; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%result = call <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32> %x,
metadata !"round.dynamic",
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index 478594fcdbc5..73371bbd2e23 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -6220,23 +6220,14 @@ entry:
define <2 x float> @constrained_vector_sitofp_v2f32_v2i32(<2 x i32> %x) #0 {
; CHECK-LABEL: constrained_vector_sitofp_v2f32_v2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: cvtsi2ss %eax, %xmm1
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: cvtsi2ss %eax, %xmm0
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vextractps $1, %xmm0, %eax
-; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm1
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0
-; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call <2 x float>
More information about the llvm-commits
mailing list