[llvm] r239966 - [X86][SSE] Improved support for vector i16 to float conversions.
Simon Pilgrim
llvm-dev at redking.me.uk
Wed Jun 17 15:43:34 PDT 2015
Author: rksimon
Date: Wed Jun 17 17:43:34 2015
New Revision: 239966
URL: http://llvm.org/viewvc/llvm-project?rev=239966&view=rev
Log:
[X86][SSE] Improved support for vector i16 to float conversions.
Added explicit sign extension for v4i16/v8i16 to v4i32/v8i32 before conversion to floats. Matches existing support for v4i8/v8i8.
Follow up to D10433
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=239966&r1=239965&r2=239966&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jun 17 17:43:34 2015
@@ -24690,18 +24690,19 @@ static SDValue PerformSINT_TO_FPCombine(
const X86Subtarget *Subtarget) {
// First try to optimize away the conversion entirely when it's
// conditionally from a constant. Vectors only.
- SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
- if (Res != SDValue())
+ if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
return Res;
// Now move on to more general possibilities.
SDValue Op0 = N->getOperand(0);
EVT InVT = Op0->getValueType(0);
- // SINT_TO_FP(v4i8) -> SINT_TO_FP(SEXT(v4i8 to v4i32))
- if (InVT == MVT::v8i8 || InVT == MVT::v4i8) {
+ // SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
+ // SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
+ if (InVT == MVT::v8i8 || InVT == MVT::v4i8 ||
+ InVT == MVT::v8i16 || InVT == MVT::v4i16) {
SDLoc dl(N);
- MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32;
+ MVT DstVT = MVT::getVectorVT(MVT::i32, InVT.getVectorNumElements());
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
}
@@ -24710,7 +24711,7 @@ static SDValue PerformSINT_TO_FPCombine(
// a 32-bit target where SSE doesn't support i64->FP operations.
if (Op0.getOpcode() == ISD::LOAD) {
LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
- EVT VT = Ld->getValueType(0);
+ EVT LdVT = Ld->getValueType(0);
// This transformation is not supported if the result type is f16
if (N->getValueType(0) == MVT::f16)
@@ -24718,9 +24719,9 @@ static SDValue PerformSINT_TO_FPCombine(
if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
- !Subtarget->is64Bit() && VT == MVT::i64) {
+ !Subtarget->is64Bit() && LdVT == MVT::i64) {
SDValue FILDChain = Subtarget->getTargetLowering()->BuildFILD(
- SDValue(N, 0), Ld->getValueType(0), Ld->getChain(), Op0, DAG);
+ SDValue(N, 0), LdVT, Ld->getChain(), Op0, DAG);
DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
return FILDChain;
}
Modified: llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll?rev=239966&r1=239965&r2=239966&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll Wed Jun 17 17:43:34 2015
@@ -176,38 +176,16 @@ define <4 x double> @sitofp_4vf64_i32(<4
define <4 x double> @sitofp_4vf64_i16(<8 x i16> %a) {
; SSE2-LABEL: sitofp_4vf64_i16:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,3]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7]
-; SSE2-NEXT: movd %xmm1, %rax
-; SSE2-NEXT: movswq %ax, %rax
-; SSE2-NEXT: cvtsi2sdq %rax, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: psrad $16, %xmm1
+; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: movd %xmm1, %rax
-; SSE2-NEXT: movswq %ax, %rax
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: movswq %ax, %rax
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: movswq %ax, %rax
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT: movapd %xmm2, %xmm0
+; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
; SSE2-NEXT: retq
;
; AVX-LABEL: sitofp_4vf64_i16:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX-NEXT: vpslld $16, %xmm0, %xmm0
-; AVX-NEXT: vpsrad $16, %xmm0, %xmm0
+; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX-NEXT: retq
%shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -632,16 +610,13 @@ define <4 x float> @sitofp_4vf32_i16(<8
; SSE2-LABEL: sitofp_4vf32_i16:
; SSE2: # BB#0:
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: pslld $16, %xmm0
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: sitofp_4vf32_i16:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX-NEXT: vpslld $16, %xmm0, %xmm0
-; AVX-NEXT: vpsrad $16, %xmm0, %xmm0
+; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX-NEXT: retq
%shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -728,13 +703,11 @@ define <4 x float> @sitofp_4vf32_4i64(<4
define <8 x float> @sitofp_8vf32_i16(<8 x i16> %a) {
; SSE2-LABEL: sitofp_8vf32_i16:
; SSE2: # BB#0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: pslld $16, %xmm1
; SSE2-NEXT: psrad $16, %xmm1
; SSE2-NEXT: cvtdq2ps %xmm1, %xmm2
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pslld $16, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm2, %xmm0
More information about the llvm-commits
mailing list