[llvm] r332498 - [X86][AVX512DQ] Use packed instructions for scalar FP<->i64 conversions on 32-bit targets
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed May 16 10:40:07 PDT 2018
Author: ctopper
Date: Wed May 16 10:40:07 2018
New Revision: 332498
URL: http://llvm.org/viewvc/llvm-project?rev=332498&view=rev
Log:
[X86][AVX512DQ] Use packed instructions for scalar FP<->i64 conversions on 32-bit targets
As i64 types are not legal on 32-bit targets, insert these into a suitable zero vector and use the packed vXi64<->FP conversion instructions instead.
Fixes PR3163.
Differential Revision: https://reviews.llvm.org/D43441
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll
llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll
llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=332498&r1=332497&r2=332498&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed May 16 10:40:07 2018
@@ -16035,6 +16035,34 @@ static SDValue LowerShiftParts(SDValue O
return DAG.getMergeValues(Ops, dl);
}
+// Try to use a packed vector operation to handle i64 on 32-bit targets when
+// AVX512DQ is enabled.
+static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ assert((Op.getOpcode() == ISD::SINT_TO_FP ||
+ Op.getOpcode() == ISD::UINT_TO_FP) && "Unexpected opcode!");
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT VT = Op.getSimpleValueType();
+
+ if (!Subtarget.hasDQI() || SrcVT != MVT::i64 || Subtarget.is64Bit() ||
+ (VT != MVT::f32 && VT != MVT::f64))
+ return SDValue();
+
+ // Pack the i64 into a vector, do the operation and extract.
+
+ // Using 256-bit to ensure result is 128-bits for f32 case.
+ unsigned NumElts = Subtarget.hasVLX() ? 4 : 8;
+ MVT VecInVT = MVT::getVectorVT(MVT::i64, NumElts);
+ MVT VecVT = MVT::getVectorVT(VT, NumElts);
+
+ SDLoc dl(Op);
+ SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecInVT, Src);
+ SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, VecVT, InVec);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
+ DAG.getIntPtrConstant(0, dl));
+}
+
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue Src = Op.getOperand(0);
@@ -16056,15 +16084,17 @@ SDValue X86TargetLowering::LowerSINT_TO_
// These are really Legal; return the operand so the caller accepts it as
// Legal.
- if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
+ if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(VT))
return Op;
- if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
- Subtarget.is64Bit()) {
+ if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) && Subtarget.is64Bit()) {
return Op;
}
+ if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
+ return V;
+
SDValue ValueToStore = Op.getOperand(0);
- if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
+ if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) &&
!Subtarget.is64Bit())
// Bitcasting to f64 here allows us to do a single 64-bit store from
// an SSE register, avoiding the store forwarding penalty that would come
@@ -16415,6 +16445,9 @@ SDValue X86TargetLowering::LowerUINT_TO_
return Op;
}
+ if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
+ return V;
+
if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i64(Op, DAG, Subtarget);
if (SrcVT == MVT::i32 && X86ScalarSSEf64)
@@ -25191,12 +25224,14 @@ void X86TargetLowering::ReplaceNodeResul
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: {
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+ EVT VT = N->getValueType(0);
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
- if (N->getValueType(0) == MVT::v2i32) {
+ if (VT == MVT::v2i32) {
assert((IsSigned || Subtarget.hasAVX512()) &&
"Can only handle signed conversion without AVX512");
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
- SDValue Src = N->getOperand(0);
if (Src.getValueType() == MVT::v2f64) {
MVT ResVT = MVT::v4i32;
unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
@@ -25217,7 +25252,7 @@ void X86TargetLowering::ReplaceNodeResul
Results.push_back(Res);
return;
}
- if (Src.getValueType() == MVT::v2f32) {
+ if (SrcVT == MVT::v2f32) {
SDValue Idx = DAG.getIntPtrConstant(0, dl);
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
DAG.getUNDEF(MVT::v2f32));
@@ -25234,11 +25269,30 @@ void X86TargetLowering::ReplaceNodeResul
return;
}
+ if (Subtarget.hasDQI() && VT == MVT::i64 &&
+ (SrcVT == MVT::f32 || SrcVT == MVT::f64)) {
+ assert(!Subtarget.is64Bit() && "i64 should be legal");
+ unsigned NumElts = Subtarget.hasVLX() ? 4 : 8;
+ // Using a 256-bit input here to guarantee 128-bit input for f32 case.
+ // TODO: Use 128-bit vectors for f64 case?
+ // TODO: Use 128-bit vectors for f32 by using CVTTP2SI/CVTTP2UI.
+ MVT VecVT = MVT::getVectorVT(MVT::i64, NumElts);
+ MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), NumElts);
+
+ SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
+ SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT,
+ DAG.getConstantFP(0.0, dl, VecInVT), Src,
+ ZeroIdx);
+ Res = DAG.getNode(N->getOpcode(), SDLoc(N), VecVT, Res);
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, ZeroIdx);
+ Results.push_back(Res);
+ return;
+ }
+
std::pair<SDValue,SDValue> Vals =
FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true);
SDValue FIST = Vals.first, StackSlot = Vals.second;
if (FIST.getNode()) {
- EVT VT = N->getValueType(0);
// Return a load from the stack slot.
if (StackSlot.getNode())
Results.push_back(
Modified: llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll?rev=332498&r1=332497&r2=332498&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll Wed May 16 10:40:07 2018
@@ -1166,30 +1166,25 @@ define x86_regcallcc <32 x float> @testf
define x86_regcallcc i32 @test_argRetMixTypes(double, float, i8 signext, i32, i64, i16 signext, i32*) #0 {
; X32-LABEL: test_argRetMixTypes:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebp
-; X32-NEXT: movl %esp, %ebp
-; X32-NEXT: andl $-8, %esp
-; X32-NEXT: subl $16, %esp
-; X32-NEXT: vmovd %edx, %xmm2
-; X32-NEXT: vpinsrd $1, %edi, %xmm2, %xmm2
-; X32-NEXT: movl 8(%ebp), %edx
+; X32-NEXT: pushl %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X32-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; X32-NEXT: vaddsd %xmm0, %xmm1, %xmm0
-; X32-NEXT: vcvtsi2sdl %eax, %xmm3, %xmm1
+; X32-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
-; X32-NEXT: vcvtsi2sdl %ecx, %xmm3, %xmm1
+; X32-NEXT: vcvtsi2sdl %ecx, %xmm2, %xmm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
-; X32-NEXT: vmovq %xmm2, {{[0-9]+}}(%esp)
-; X32-NEXT: fildll {{[0-9]+}}(%esp)
-; X32-NEXT: fstpl (%esp)
-; X32-NEXT: vaddsd (%esp), %xmm0, %xmm0
-; X32-NEXT: vcvtsi2sdl %esi, %xmm3, %xmm1
+; X32-NEXT: vmovd %edx, %xmm1
+; X32-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1
+; X32-NEXT: vcvtqq2pd %ymm1, %ymm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
-; X32-NEXT: vcvtsi2sdl (%edx), %xmm3, %xmm1
+; X32-NEXT: vcvtsi2sdl %esi, %xmm2, %xmm1
+; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
+; X32-NEXT: vcvtsi2sdl (%ebx), %xmm2, %xmm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X32-NEXT: vcvttsd2si %xmm0, %eax
-; X32-NEXT: movl %ebp, %esp
-; X32-NEXT: popl %ebp
+; X32-NEXT: popl %ebx
+; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRetMixTypes:
Modified: llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll?rev=332498&r1=332497&r2=332498&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll Wed May 16 10:40:07 2018
@@ -39,55 +39,91 @@
; SSE2 (cvtts[ds]2si) and vanilla X87 (fnstcw+fist, 32-bit only).
define i64 @f_to_u64(float %a) nounwind {
-; AVX512_32_WIN-LABEL: f_to_u64:
-; AVX512_32_WIN: # %bb.0:
-; AVX512_32_WIN-NEXT: pushl %ebp
-; AVX512_32_WIN-NEXT: movl %esp, %ebp
-; AVX512_32_WIN-NEXT: andl $-8, %esp
-; AVX512_32_WIN-NEXT: subl $16, %esp
-; AVX512_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512_32_WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX512_32_WIN-NEXT: vcmpltss %xmm1, %xmm0, %k1
-; AVX512_32_WIN-NEXT: vsubss %xmm1, %xmm0, %xmm2
-; AVX512_32_WIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1}
-; AVX512_32_WIN-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
-; AVX512_32_WIN-NEXT: flds {{[0-9]+}}(%esp)
-; AVX512_32_WIN-NEXT: fisttpll (%esp)
-; AVX512_32_WIN-NEXT: xorl %edx, %edx
-; AVX512_32_WIN-NEXT: vucomiss %xmm0, %xmm1
-; AVX512_32_WIN-NEXT: setbe %dl
-; AVX512_32_WIN-NEXT: shll $31, %edx
-; AVX512_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx
-; AVX512_32_WIN-NEXT: movl (%esp), %eax
-; AVX512_32_WIN-NEXT: movl %ebp, %esp
-; AVX512_32_WIN-NEXT: popl %ebp
-; AVX512_32_WIN-NEXT: retl
-;
-; AVX512_32_LIN-LABEL: f_to_u64:
-; AVX512_32_LIN: # %bb.0:
-; AVX512_32_LIN-NEXT: subl $20, %esp
-; AVX512_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512_32_LIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX512_32_LIN-NEXT: vcmpltss %xmm1, %xmm0, %k1
-; AVX512_32_LIN-NEXT: vsubss %xmm1, %xmm0, %xmm2
-; AVX512_32_LIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1}
-; AVX512_32_LIN-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
-; AVX512_32_LIN-NEXT: flds {{[0-9]+}}(%esp)
-; AVX512_32_LIN-NEXT: fisttpll (%esp)
-; AVX512_32_LIN-NEXT: xorl %edx, %edx
-; AVX512_32_LIN-NEXT: vucomiss %xmm0, %xmm1
-; AVX512_32_LIN-NEXT: setbe %dl
-; AVX512_32_LIN-NEXT: shll $31, %edx
-; AVX512_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx
-; AVX512_32_LIN-NEXT: movl (%esp), %eax
-; AVX512_32_LIN-NEXT: addl $20, %esp
-; AVX512_32_LIN-NEXT: retl
+; AVX512DQVL_32_WIN-LABEL: f_to_u64:
+; AVX512DQVL_32_WIN: # %bb.0:
+; AVX512DQVL_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512DQVL_32_WIN-NEXT: vcvttps2uqq %xmm0, %ymm0
+; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax
+; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx
+; AVX512DQVL_32_WIN-NEXT: vzeroupper
+; AVX512DQVL_32_WIN-NEXT: retl
+;
+; AVX512DQVL_32_LIN-LABEL: f_to_u64:
+; AVX512DQVL_32_LIN: # %bb.0:
+; AVX512DQVL_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512DQVL_32_LIN-NEXT: vcvttps2uqq %xmm0, %ymm0
+; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax
+; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx
+; AVX512DQVL_32_LIN-NEXT: vzeroupper
+; AVX512DQVL_32_LIN-NEXT: retl
;
; AVX512_64-LABEL: f_to_u64:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: vcvttss2usi %xmm0, %rax
; AVX512_64-NEXT: retq
;
+; AVX512DQ_32_WIN-LABEL: f_to_u64:
+; AVX512DQ_32_WIN: # %bb.0:
+; AVX512DQ_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512DQ_32_WIN-NEXT: vcvttps2uqq %ymm0, %zmm0
+; AVX512DQ_32_WIN-NEXT: vmovd %xmm0, %eax
+; AVX512DQ_32_WIN-NEXT: vpextrd $1, %xmm0, %edx
+; AVX512DQ_32_WIN-NEXT: vzeroupper
+; AVX512DQ_32_WIN-NEXT: retl
+;
+; AVX512DQ_32_LIN-LABEL: f_to_u64:
+; AVX512DQ_32_LIN: # %bb.0:
+; AVX512DQ_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512DQ_32_LIN-NEXT: vcvttps2uqq %ymm0, %zmm0
+; AVX512DQ_32_LIN-NEXT: vmovd %xmm0, %eax
+; AVX512DQ_32_LIN-NEXT: vpextrd $1, %xmm0, %edx
+; AVX512DQ_32_LIN-NEXT: vzeroupper
+; AVX512DQ_32_LIN-NEXT: retl
+;
+; AVX512F_32_WIN-LABEL: f_to_u64:
+; AVX512F_32_WIN: # %bb.0:
+; AVX512F_32_WIN-NEXT: pushl %ebp
+; AVX512F_32_WIN-NEXT: movl %esp, %ebp
+; AVX512F_32_WIN-NEXT: andl $-8, %esp
+; AVX512F_32_WIN-NEXT: subl $16, %esp
+; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX512F_32_WIN-NEXT: vcmpltss %xmm1, %xmm0, %k1
+; AVX512F_32_WIN-NEXT: vsubss %xmm1, %xmm0, %xmm2
+; AVX512F_32_WIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1}
+; AVX512F_32_WIN-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
+; AVX512F_32_WIN-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F_32_WIN-NEXT: fisttpll (%esp)
+; AVX512F_32_WIN-NEXT: xorl %edx, %edx
+; AVX512F_32_WIN-NEXT: vucomiss %xmm0, %xmm1
+; AVX512F_32_WIN-NEXT: setbe %dl
+; AVX512F_32_WIN-NEXT: shll $31, %edx
+; AVX512F_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx
+; AVX512F_32_WIN-NEXT: movl (%esp), %eax
+; AVX512F_32_WIN-NEXT: movl %ebp, %esp
+; AVX512F_32_WIN-NEXT: popl %ebp
+; AVX512F_32_WIN-NEXT: retl
+;
+; AVX512F_32_LIN-LABEL: f_to_u64:
+; AVX512F_32_LIN: # %bb.0:
+; AVX512F_32_LIN-NEXT: subl $20, %esp
+; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX512F_32_LIN-NEXT: vcmpltss %xmm1, %xmm0, %k1
+; AVX512F_32_LIN-NEXT: vsubss %xmm1, %xmm0, %xmm2
+; AVX512F_32_LIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1}
+; AVX512F_32_LIN-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
+; AVX512F_32_LIN-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F_32_LIN-NEXT: fisttpll (%esp)
+; AVX512F_32_LIN-NEXT: xorl %edx, %edx
+; AVX512F_32_LIN-NEXT: vucomiss %xmm0, %xmm1
+; AVX512F_32_LIN-NEXT: setbe %dl
+; AVX512F_32_LIN-NEXT: shll $31, %edx
+; AVX512F_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx
+; AVX512F_32_LIN-NEXT: movl (%esp), %eax
+; AVX512F_32_LIN-NEXT: addl $20, %esp
+; AVX512F_32_LIN-NEXT: retl
+;
; SSE3_32_WIN-LABEL: f_to_u64:
; SSE3_32_WIN: # %bb.0:
; SSE3_32_WIN-NEXT: pushl %ebp
@@ -304,39 +340,75 @@ define i64 @f_to_u64(float %a) nounwind
}
define i64 @f_to_s64(float %a) nounwind {
-; AVX512_32_WIN-LABEL: f_to_s64:
-; AVX512_32_WIN: # %bb.0:
-; AVX512_32_WIN-NEXT: pushl %ebp
-; AVX512_32_WIN-NEXT: movl %esp, %ebp
-; AVX512_32_WIN-NEXT: andl $-8, %esp
-; AVX512_32_WIN-NEXT: subl $16, %esp
-; AVX512_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512_32_WIN-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
-; AVX512_32_WIN-NEXT: flds {{[0-9]+}}(%esp)
-; AVX512_32_WIN-NEXT: fisttpll (%esp)
-; AVX512_32_WIN-NEXT: movl (%esp), %eax
-; AVX512_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx
-; AVX512_32_WIN-NEXT: movl %ebp, %esp
-; AVX512_32_WIN-NEXT: popl %ebp
-; AVX512_32_WIN-NEXT: retl
-;
-; AVX512_32_LIN-LABEL: f_to_s64:
-; AVX512_32_LIN: # %bb.0:
-; AVX512_32_LIN-NEXT: subl $20, %esp
-; AVX512_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512_32_LIN-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
-; AVX512_32_LIN-NEXT: flds {{[0-9]+}}(%esp)
-; AVX512_32_LIN-NEXT: fisttpll (%esp)
-; AVX512_32_LIN-NEXT: movl (%esp), %eax
-; AVX512_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx
-; AVX512_32_LIN-NEXT: addl $20, %esp
-; AVX512_32_LIN-NEXT: retl
+; AVX512DQVL_32_WIN-LABEL: f_to_s64:
+; AVX512DQVL_32_WIN: # %bb.0:
+; AVX512DQVL_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512DQVL_32_WIN-NEXT: vcvttps2qq %xmm0, %ymm0
+; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax
+; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx
+; AVX512DQVL_32_WIN-NEXT: vzeroupper
+; AVX512DQVL_32_WIN-NEXT: retl
+;
+; AVX512DQVL_32_LIN-LABEL: f_to_s64:
+; AVX512DQVL_32_LIN: # %bb.0:
+; AVX512DQVL_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512DQVL_32_LIN-NEXT: vcvttps2qq %xmm0, %ymm0
+; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax
+; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx
+; AVX512DQVL_32_LIN-NEXT: vzeroupper
+; AVX512DQVL_32_LIN-NEXT: retl
;
; AVX512_64-LABEL: f_to_s64:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: vcvttss2si %xmm0, %rax
; AVX512_64-NEXT: retq
;
+; AVX512DQ_32_WIN-LABEL: f_to_s64:
+; AVX512DQ_32_WIN: # %bb.0:
+; AVX512DQ_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512DQ_32_WIN-NEXT: vcvttps2qq %ymm0, %zmm0
+; AVX512DQ_32_WIN-NEXT: vmovd %xmm0, %eax
+; AVX512DQ_32_WIN-NEXT: vpextrd $1, %xmm0, %edx
+; AVX512DQ_32_WIN-NEXT: vzeroupper
+; AVX512DQ_32_WIN-NEXT: retl
+;
+; AVX512DQ_32_LIN-LABEL: f_to_s64:
+; AVX512DQ_32_LIN: # %bb.0:
+; AVX512DQ_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512DQ_32_LIN-NEXT: vcvttps2qq %ymm0, %zmm0
+; AVX512DQ_32_LIN-NEXT: vmovd %xmm0, %eax
+; AVX512DQ_32_LIN-NEXT: vpextrd $1, %xmm0, %edx
+; AVX512DQ_32_LIN-NEXT: vzeroupper
+; AVX512DQ_32_LIN-NEXT: retl
+;
+; AVX512F_32_WIN-LABEL: f_to_s64:
+; AVX512F_32_WIN: # %bb.0:
+; AVX512F_32_WIN-NEXT: pushl %ebp
+; AVX512F_32_WIN-NEXT: movl %esp, %ebp
+; AVX512F_32_WIN-NEXT: andl $-8, %esp
+; AVX512F_32_WIN-NEXT: subl $16, %esp
+; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F_32_WIN-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
+; AVX512F_32_WIN-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F_32_WIN-NEXT: fisttpll (%esp)
+; AVX512F_32_WIN-NEXT: movl (%esp), %eax
+; AVX512F_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F_32_WIN-NEXT: movl %ebp, %esp
+; AVX512F_32_WIN-NEXT: popl %ebp
+; AVX512F_32_WIN-NEXT: retl
+;
+; AVX512F_32_LIN-LABEL: f_to_s64:
+; AVX512F_32_LIN: # %bb.0:
+; AVX512F_32_LIN-NEXT: subl $20, %esp
+; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F_32_LIN-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
+; AVX512F_32_LIN-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F_32_LIN-NEXT: fisttpll (%esp)
+; AVX512F_32_LIN-NEXT: movl (%esp), %eax
+; AVX512F_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F_32_LIN-NEXT: addl $20, %esp
+; AVX512F_32_LIN-NEXT: retl
+;
; SSE3_32_WIN-LABEL: f_to_s64:
; SSE3_32_WIN: # %bb.0:
; SSE3_32_WIN-NEXT: pushl %ebp
@@ -455,55 +527,91 @@ define i64 @f_to_s64(float %a) nounwind
}
define i64 @d_to_u64(double %a) nounwind {
-; AVX512_32_WIN-LABEL: d_to_u64:
-; AVX512_32_WIN: # %bb.0:
-; AVX512_32_WIN-NEXT: pushl %ebp
-; AVX512_32_WIN-NEXT: movl %esp, %ebp
-; AVX512_32_WIN-NEXT: andl $-8, %esp
-; AVX512_32_WIN-NEXT: subl $16, %esp
-; AVX512_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512_32_WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512_32_WIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1
-; AVX512_32_WIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2
-; AVX512_32_WIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1}
-; AVX512_32_WIN-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
-; AVX512_32_WIN-NEXT: fldl {{[0-9]+}}(%esp)
-; AVX512_32_WIN-NEXT: fisttpll (%esp)
-; AVX512_32_WIN-NEXT: xorl %edx, %edx
-; AVX512_32_WIN-NEXT: vucomisd %xmm0, %xmm1
-; AVX512_32_WIN-NEXT: setbe %dl
-; AVX512_32_WIN-NEXT: shll $31, %edx
-; AVX512_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx
-; AVX512_32_WIN-NEXT: movl (%esp), %eax
-; AVX512_32_WIN-NEXT: movl %ebp, %esp
-; AVX512_32_WIN-NEXT: popl %ebp
-; AVX512_32_WIN-NEXT: retl
-;
-; AVX512_32_LIN-LABEL: d_to_u64:
-; AVX512_32_LIN: # %bb.0:
-; AVX512_32_LIN-NEXT: subl $20, %esp
-; AVX512_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512_32_LIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512_32_LIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1
-; AVX512_32_LIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2
-; AVX512_32_LIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1}
-; AVX512_32_LIN-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
-; AVX512_32_LIN-NEXT: fldl {{[0-9]+}}(%esp)
-; AVX512_32_LIN-NEXT: fisttpll (%esp)
-; AVX512_32_LIN-NEXT: xorl %edx, %edx
-; AVX512_32_LIN-NEXT: vucomisd %xmm0, %xmm1
-; AVX512_32_LIN-NEXT: setbe %dl
-; AVX512_32_LIN-NEXT: shll $31, %edx
-; AVX512_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx
-; AVX512_32_LIN-NEXT: movl (%esp), %eax
-; AVX512_32_LIN-NEXT: addl $20, %esp
-; AVX512_32_LIN-NEXT: retl
+; AVX512DQVL_32_WIN-LABEL: d_to_u64:
+; AVX512DQVL_32_WIN: # %bb.0:
+; AVX512DQVL_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQVL_32_WIN-NEXT: vcvttpd2uqq %ymm0, %ymm0
+; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax
+; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx
+; AVX512DQVL_32_WIN-NEXT: vzeroupper
+; AVX512DQVL_32_WIN-NEXT: retl
+;
+; AVX512DQVL_32_LIN-LABEL: d_to_u64:
+; AVX512DQVL_32_LIN: # %bb.0:
+; AVX512DQVL_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQVL_32_LIN-NEXT: vcvttpd2uqq %ymm0, %ymm0
+; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax
+; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx
+; AVX512DQVL_32_LIN-NEXT: vzeroupper
+; AVX512DQVL_32_LIN-NEXT: retl
;
; AVX512_64-LABEL: d_to_u64:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: vcvttsd2usi %xmm0, %rax
; AVX512_64-NEXT: retq
;
+; AVX512DQ_32_WIN-LABEL: d_to_u64:
+; AVX512DQ_32_WIN: # %bb.0:
+; AVX512DQ_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQ_32_WIN-NEXT: vcvttpd2uqq %zmm0, %zmm0
+; AVX512DQ_32_WIN-NEXT: vmovd %xmm0, %eax
+; AVX512DQ_32_WIN-NEXT: vpextrd $1, %xmm0, %edx
+; AVX512DQ_32_WIN-NEXT: vzeroupper
+; AVX512DQ_32_WIN-NEXT: retl
+;
+; AVX512DQ_32_LIN-LABEL: d_to_u64:
+; AVX512DQ_32_LIN: # %bb.0:
+; AVX512DQ_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQ_32_LIN-NEXT: vcvttpd2uqq %zmm0, %zmm0
+; AVX512DQ_32_LIN-NEXT: vmovd %xmm0, %eax
+; AVX512DQ_32_LIN-NEXT: vpextrd $1, %xmm0, %edx
+; AVX512DQ_32_LIN-NEXT: vzeroupper
+; AVX512DQ_32_LIN-NEXT: retl
+;
+; AVX512F_32_WIN-LABEL: d_to_u64:
+; AVX512F_32_WIN: # %bb.0:
+; AVX512F_32_WIN-NEXT: pushl %ebp
+; AVX512F_32_WIN-NEXT: movl %esp, %ebp
+; AVX512F_32_WIN-NEXT: andl $-8, %esp
+; AVX512F_32_WIN-NEXT: subl $16, %esp
+; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX512F_32_WIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1
+; AVX512F_32_WIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2
+; AVX512F_32_WIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1}
+; AVX512F_32_WIN-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
+; AVX512F_32_WIN-NEXT: fldl {{[0-9]+}}(%esp)
+; AVX512F_32_WIN-NEXT: fisttpll (%esp)
+; AVX512F_32_WIN-NEXT: xorl %edx, %edx
+; AVX512F_32_WIN-NEXT: vucomisd %xmm0, %xmm1
+; AVX512F_32_WIN-NEXT: setbe %dl
+; AVX512F_32_WIN-NEXT: shll $31, %edx
+; AVX512F_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx
+; AVX512F_32_WIN-NEXT: movl (%esp), %eax
+; AVX512F_32_WIN-NEXT: movl %ebp, %esp
+; AVX512F_32_WIN-NEXT: popl %ebp
+; AVX512F_32_WIN-NEXT: retl
+;
+; AVX512F_32_LIN-LABEL: d_to_u64:
+; AVX512F_32_LIN: # %bb.0:
+; AVX512F_32_LIN-NEXT: subl $20, %esp
+; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX512F_32_LIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1
+; AVX512F_32_LIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2
+; AVX512F_32_LIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1}
+; AVX512F_32_LIN-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
+; AVX512F_32_LIN-NEXT: fldl {{[0-9]+}}(%esp)
+; AVX512F_32_LIN-NEXT: fisttpll (%esp)
+; AVX512F_32_LIN-NEXT: xorl %edx, %edx
+; AVX512F_32_LIN-NEXT: vucomisd %xmm0, %xmm1
+; AVX512F_32_LIN-NEXT: setbe %dl
+; AVX512F_32_LIN-NEXT: shll $31, %edx
+; AVX512F_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx
+; AVX512F_32_LIN-NEXT: movl (%esp), %eax
+; AVX512F_32_LIN-NEXT: addl $20, %esp
+; AVX512F_32_LIN-NEXT: retl
+;
; SSE3_32_WIN-LABEL: d_to_u64:
; SSE3_32_WIN: # %bb.0:
; SSE3_32_WIN-NEXT: pushl %ebp
@@ -720,39 +828,75 @@ define i64 @d_to_u64(double %a) nounwind
}
define i64 @d_to_s64(double %a) nounwind {
-; AVX512_32_WIN-LABEL: d_to_s64:
-; AVX512_32_WIN: # %bb.0:
-; AVX512_32_WIN-NEXT: pushl %ebp
-; AVX512_32_WIN-NEXT: movl %esp, %ebp
-; AVX512_32_WIN-NEXT: andl $-8, %esp
-; AVX512_32_WIN-NEXT: subl $16, %esp
-; AVX512_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512_32_WIN-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
-; AVX512_32_WIN-NEXT: fldl {{[0-9]+}}(%esp)
-; AVX512_32_WIN-NEXT: fisttpll (%esp)
-; AVX512_32_WIN-NEXT: movl (%esp), %eax
-; AVX512_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx
-; AVX512_32_WIN-NEXT: movl %ebp, %esp
-; AVX512_32_WIN-NEXT: popl %ebp
-; AVX512_32_WIN-NEXT: retl
-;
-; AVX512_32_LIN-LABEL: d_to_s64:
-; AVX512_32_LIN: # %bb.0:
-; AVX512_32_LIN-NEXT: subl $20, %esp
-; AVX512_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512_32_LIN-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
-; AVX512_32_LIN-NEXT: fldl {{[0-9]+}}(%esp)
-; AVX512_32_LIN-NEXT: fisttpll (%esp)
-; AVX512_32_LIN-NEXT: movl (%esp), %eax
-; AVX512_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx
-; AVX512_32_LIN-NEXT: addl $20, %esp
-; AVX512_32_LIN-NEXT: retl
+; AVX512DQVL_32_WIN-LABEL: d_to_s64:
+; AVX512DQVL_32_WIN: # %bb.0:
+; AVX512DQVL_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQVL_32_WIN-NEXT: vcvttpd2qq %ymm0, %ymm0
+; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax
+; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx
+; AVX512DQVL_32_WIN-NEXT: vzeroupper
+; AVX512DQVL_32_WIN-NEXT: retl
+;
+; AVX512DQVL_32_LIN-LABEL: d_to_s64:
+; AVX512DQVL_32_LIN: # %bb.0:
+; AVX512DQVL_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQVL_32_LIN-NEXT: vcvttpd2qq %ymm0, %ymm0
+; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax
+; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx
+; AVX512DQVL_32_LIN-NEXT: vzeroupper
+; AVX512DQVL_32_LIN-NEXT: retl
;
; AVX512_64-LABEL: d_to_s64:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: vcvttsd2si %xmm0, %rax
; AVX512_64-NEXT: retq
;
+; AVX512DQ_32_WIN-LABEL: d_to_s64:
+; AVX512DQ_32_WIN: # %bb.0:
+; AVX512DQ_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQ_32_WIN-NEXT: vcvttpd2qq %zmm0, %zmm0
+; AVX512DQ_32_WIN-NEXT: vmovd %xmm0, %eax
+; AVX512DQ_32_WIN-NEXT: vpextrd $1, %xmm0, %edx
+; AVX512DQ_32_WIN-NEXT: vzeroupper
+; AVX512DQ_32_WIN-NEXT: retl
+;
+; AVX512DQ_32_LIN-LABEL: d_to_s64:
+; AVX512DQ_32_LIN: # %bb.0:
+; AVX512DQ_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQ_32_LIN-NEXT: vcvttpd2qq %zmm0, %zmm0
+; AVX512DQ_32_LIN-NEXT: vmovd %xmm0, %eax
+; AVX512DQ_32_LIN-NEXT: vpextrd $1, %xmm0, %edx
+; AVX512DQ_32_LIN-NEXT: vzeroupper
+; AVX512DQ_32_LIN-NEXT: retl
+;
+; AVX512F_32_WIN-LABEL: d_to_s64:
+; AVX512F_32_WIN: # %bb.0:
+; AVX512F_32_WIN-NEXT: pushl %ebp
+; AVX512F_32_WIN-NEXT: movl %esp, %ebp
+; AVX512F_32_WIN-NEXT: andl $-8, %esp
+; AVX512F_32_WIN-NEXT: subl $16, %esp
+; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512F_32_WIN-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
+; AVX512F_32_WIN-NEXT: fldl {{[0-9]+}}(%esp)
+; AVX512F_32_WIN-NEXT: fisttpll (%esp)
+; AVX512F_32_WIN-NEXT: movl (%esp), %eax
+; AVX512F_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F_32_WIN-NEXT: movl %ebp, %esp
+; AVX512F_32_WIN-NEXT: popl %ebp
+; AVX512F_32_WIN-NEXT: retl
+;
+; AVX512F_32_LIN-LABEL: d_to_s64:
+; AVX512F_32_LIN: # %bb.0:
+; AVX512F_32_LIN-NEXT: subl $20, %esp
+; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512F_32_LIN-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
+; AVX512F_32_LIN-NEXT: fldl {{[0-9]+}}(%esp)
+; AVX512F_32_LIN-NEXT: fisttpll (%esp)
+; AVX512F_32_LIN-NEXT: movl (%esp), %eax
+; AVX512F_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F_32_LIN-NEXT: addl $20, %esp
+; AVX512F_32_LIN-NEXT: retl
+;
; SSE3_32_WIN-LABEL: d_to_s64:
; SSE3_32_WIN: # %bb.0:
; SSE3_32_WIN-NEXT: pushl %ebp
Modified: llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll?rev=332498&r1=332497&r2=332498&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll Wed May 16 10:40:07 2018
@@ -300,32 +300,54 @@ define x86_fp80 @s32_to_x(i32 %a) nounwi
}
define float @u64_to_f(i64 %a) nounwind {
-; AVX512_32-LABEL: u64_to_f:
-; AVX512_32: # %bb.0:
-; AVX512_32-NEXT: pushl %ebp
-; AVX512_32-NEXT: movl %esp, %ebp
-; AVX512_32-NEXT: andl $-8, %esp
-; AVX512_32-NEXT: subl $16, %esp
-; AVX512_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512_32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
-; AVX512_32-NEXT: xorl %eax, %eax
-; AVX512_32-NEXT: cmpl $0, 12(%ebp)
-; AVX512_32-NEXT: setns %al
-; AVX512_32-NEXT: fildll {{[0-9]+}}(%esp)
-; AVX512_32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
-; AVX512_32-NEXT: fstps {{[0-9]+}}(%esp)
-; AVX512_32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512_32-NEXT: vmovss %xmm0, (%esp)
-; AVX512_32-NEXT: flds (%esp)
-; AVX512_32-NEXT: movl %ebp, %esp
-; AVX512_32-NEXT: popl %ebp
-; AVX512_32-NEXT: retl
+; AVX512DQVL_32-LABEL: u64_to_f:
+; AVX512DQVL_32: # %bb.0:
+; AVX512DQVL_32-NEXT: pushl %eax
+; AVX512DQVL_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQVL_32-NEXT: vcvtuqq2ps %ymm0, %xmm0
+; AVX512DQVL_32-NEXT: vmovss %xmm0, (%esp)
+; AVX512DQVL_32-NEXT: flds (%esp)
+; AVX512DQVL_32-NEXT: popl %eax
+; AVX512DQVL_32-NEXT: vzeroupper
+; AVX512DQVL_32-NEXT: retl
;
; AVX512_64-LABEL: u64_to_f:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0
; AVX512_64-NEXT: retq
;
+; AVX512DQ_32-LABEL: u64_to_f:
+; AVX512DQ_32: # %bb.0:
+; AVX512DQ_32-NEXT: pushl %eax
+; AVX512DQ_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQ_32-NEXT: vcvtuqq2ps %zmm0, %ymm0
+; AVX512DQ_32-NEXT: vmovss %xmm0, (%esp)
+; AVX512DQ_32-NEXT: flds (%esp)
+; AVX512DQ_32-NEXT: popl %eax
+; AVX512DQ_32-NEXT: vzeroupper
+; AVX512DQ_32-NEXT: retl
+;
+; AVX512F_32-LABEL: u64_to_f:
+; AVX512F_32: # %bb.0:
+; AVX512F_32-NEXT: pushl %ebp
+; AVX512F_32-NEXT: movl %esp, %ebp
+; AVX512F_32-NEXT: andl $-8, %esp
+; AVX512F_32-NEXT: subl $16, %esp
+; AVX512F_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512F_32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
+; AVX512F_32-NEXT: xorl %eax, %eax
+; AVX512F_32-NEXT: cmpl $0, 12(%ebp)
+; AVX512F_32-NEXT: setns %al
+; AVX512F_32-NEXT: fildll {{[0-9]+}}(%esp)
+; AVX512F_32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
+; AVX512F_32-NEXT: fstps {{[0-9]+}}(%esp)
+; AVX512F_32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F_32-NEXT: vmovss %xmm0, (%esp)
+; AVX512F_32-NEXT: flds (%esp)
+; AVX512F_32-NEXT: movl %ebp, %esp
+; AVX512F_32-NEXT: popl %ebp
+; AVX512F_32-NEXT: retl
+;
; SSE2_32-LABEL: u64_to_f:
; SSE2_32: # %bb.0:
; SSE2_32-NEXT: pushl %ebp
@@ -425,25 +447,21 @@ define float @s64_to_f(i64 %a) nounwind
}
define float @s64_to_f_2(i64 %a) nounwind {
-; AVX512_32-LABEL: s64_to_f_2:
-; AVX512_32: # %bb.0:
-; AVX512_32-NEXT: pushl %ebp
-; AVX512_32-NEXT: movl %esp, %ebp
-; AVX512_32-NEXT: andl $-8, %esp
-; AVX512_32-NEXT: subl $16, %esp
-; AVX512_32-NEXT: movl 8(%ebp), %eax
-; AVX512_32-NEXT: movl 12(%ebp), %ecx
-; AVX512_32-NEXT: addl $5, %eax
-; AVX512_32-NEXT: adcl $0, %ecx
-; AVX512_32-NEXT: vmovd %eax, %xmm0
-; AVX512_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX512_32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
-; AVX512_32-NEXT: fildll {{[0-9]+}}(%esp)
-; AVX512_32-NEXT: fstps {{[0-9]+}}(%esp)
-; AVX512_32-NEXT: flds {{[0-9]+}}(%esp)
-; AVX512_32-NEXT: movl %ebp, %esp
-; AVX512_32-NEXT: popl %ebp
-; AVX512_32-NEXT: retl
+; AVX512DQVL_32-LABEL: s64_to_f_2:
+; AVX512DQVL_32: # %bb.0:
+; AVX512DQVL_32-NEXT: pushl %eax
+; AVX512DQVL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512DQVL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; AVX512DQVL_32-NEXT: addl $5, %eax
+; AVX512DQVL_32-NEXT: adcl $0, %ecx
+; AVX512DQVL_32-NEXT: vmovd %eax, %xmm0
+; AVX512DQVL_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512DQVL_32-NEXT: vcvtqq2ps %ymm0, %xmm0
+; AVX512DQVL_32-NEXT: vmovss %xmm0, (%esp)
+; AVX512DQVL_32-NEXT: flds (%esp)
+; AVX512DQVL_32-NEXT: popl %eax
+; AVX512DQVL_32-NEXT: vzeroupper
+; AVX512DQVL_32-NEXT: retl
;
; AVX512_64-LABEL: s64_to_f_2:
; AVX512_64: # %bb.0:
@@ -451,6 +469,42 @@ define float @s64_to_f_2(i64 %a) nounwin
; AVX512_64-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0
; AVX512_64-NEXT: retq
;
+; AVX512DQ_32-LABEL: s64_to_f_2:
+; AVX512DQ_32: # %bb.0:
+; AVX512DQ_32-NEXT: pushl %eax
+; AVX512DQ_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512DQ_32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; AVX512DQ_32-NEXT: addl $5, %eax
+; AVX512DQ_32-NEXT: adcl $0, %ecx
+; AVX512DQ_32-NEXT: vmovd %eax, %xmm0
+; AVX512DQ_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512DQ_32-NEXT: vcvtqq2ps %zmm0, %ymm0
+; AVX512DQ_32-NEXT: vmovss %xmm0, (%esp)
+; AVX512DQ_32-NEXT: flds (%esp)
+; AVX512DQ_32-NEXT: popl %eax
+; AVX512DQ_32-NEXT: vzeroupper
+; AVX512DQ_32-NEXT: retl
+;
+; AVX512F_32-LABEL: s64_to_f_2:
+; AVX512F_32: # %bb.0:
+; AVX512F_32-NEXT: pushl %ebp
+; AVX512F_32-NEXT: movl %esp, %ebp
+; AVX512F_32-NEXT: andl $-8, %esp
+; AVX512F_32-NEXT: subl $16, %esp
+; AVX512F_32-NEXT: movl 8(%ebp), %eax
+; AVX512F_32-NEXT: movl 12(%ebp), %ecx
+; AVX512F_32-NEXT: addl $5, %eax
+; AVX512F_32-NEXT: adcl $0, %ecx
+; AVX512F_32-NEXT: vmovd %eax, %xmm0
+; AVX512F_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512F_32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
+; AVX512F_32-NEXT: fildll {{[0-9]+}}(%esp)
+; AVX512F_32-NEXT: fstps {{[0-9]+}}(%esp)
+; AVX512F_32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F_32-NEXT: movl %ebp, %esp
+; AVX512F_32-NEXT: popl %ebp
+; AVX512F_32-NEXT: retl
+;
; SSE2_32-LABEL: s64_to_f_2:
; SSE2_32: # %bb.0:
; SSE2_32-NEXT: pushl %ebp
@@ -500,27 +554,57 @@ define float @s64_to_f_2(i64 %a) nounwin
}
define double @u64_to_d(i64 %a) nounwind {
-; AVX512_32-LABEL: u64_to_d:
-; AVX512_32: # %bb.0:
-; AVX512_32-NEXT: pushl %ebp
-; AVX512_32-NEXT: movl %esp, %ebp
-; AVX512_32-NEXT: andl $-8, %esp
-; AVX512_32-NEXT: subl $8, %esp
-; AVX512_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512_32-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
-; AVX512_32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0
-; AVX512_32-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
-; AVX512_32-NEXT: vmovlpd %xmm0, (%esp)
-; AVX512_32-NEXT: fldl (%esp)
-; AVX512_32-NEXT: movl %ebp, %esp
-; AVX512_32-NEXT: popl %ebp
-; AVX512_32-NEXT: retl
+; AVX512DQVL_32-LABEL: u64_to_d:
+; AVX512DQVL_32: # %bb.0:
+; AVX512DQVL_32-NEXT: pushl %ebp
+; AVX512DQVL_32-NEXT: movl %esp, %ebp
+; AVX512DQVL_32-NEXT: andl $-8, %esp
+; AVX512DQVL_32-NEXT: subl $8, %esp
+; AVX512DQVL_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQVL_32-NEXT: vcvtuqq2pd %ymm0, %ymm0
+; AVX512DQVL_32-NEXT: vmovlps %xmm0, (%esp)
+; AVX512DQVL_32-NEXT: fldl (%esp)
+; AVX512DQVL_32-NEXT: movl %ebp, %esp
+; AVX512DQVL_32-NEXT: popl %ebp
+; AVX512DQVL_32-NEXT: vzeroupper
+; AVX512DQVL_32-NEXT: retl
;
; AVX512_64-LABEL: u64_to_d:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0
; AVX512_64-NEXT: retq
;
+; AVX512DQ_32-LABEL: u64_to_d:
+; AVX512DQ_32: # %bb.0:
+; AVX512DQ_32-NEXT: pushl %ebp
+; AVX512DQ_32-NEXT: movl %esp, %ebp
+; AVX512DQ_32-NEXT: andl $-8, %esp
+; AVX512DQ_32-NEXT: subl $8, %esp
+; AVX512DQ_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQ_32-NEXT: vcvtuqq2pd %zmm0, %zmm0
+; AVX512DQ_32-NEXT: vmovlps %xmm0, (%esp)
+; AVX512DQ_32-NEXT: fldl (%esp)
+; AVX512DQ_32-NEXT: movl %ebp, %esp
+; AVX512DQ_32-NEXT: popl %ebp
+; AVX512DQ_32-NEXT: vzeroupper
+; AVX512DQ_32-NEXT: retl
+;
+; AVX512F_32-LABEL: u64_to_d:
+; AVX512F_32: # %bb.0:
+; AVX512F_32-NEXT: pushl %ebp
+; AVX512F_32-NEXT: movl %esp, %ebp
+; AVX512F_32-NEXT: andl $-8, %esp
+; AVX512F_32-NEXT: subl $8, %esp
+; AVX512F_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512F_32-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX512F_32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0
+; AVX512F_32-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
+; AVX512F_32-NEXT: vmovlpd %xmm0, (%esp)
+; AVX512F_32-NEXT: fldl (%esp)
+; AVX512F_32-NEXT: movl %ebp, %esp
+; AVX512F_32-NEXT: popl %ebp
+; AVX512F_32-NEXT: retl
+;
; SSE2_32-LABEL: u64_to_d:
; SSE2_32: # %bb.0:
; SSE2_32-NEXT: pushl %ebp
@@ -617,25 +701,25 @@ define double @s64_to_d(i64 %a) nounwind
}
define double @s64_to_d_2(i64 %a) nounwind {
-; AVX512_32-LABEL: s64_to_d_2:
-; AVX512_32: # %bb.0:
-; AVX512_32-NEXT: pushl %ebp
-; AVX512_32-NEXT: movl %esp, %ebp
-; AVX512_32-NEXT: andl $-8, %esp
-; AVX512_32-NEXT: subl $16, %esp
-; AVX512_32-NEXT: movl 8(%ebp), %eax
-; AVX512_32-NEXT: movl 12(%ebp), %ecx
-; AVX512_32-NEXT: addl $5, %eax
-; AVX512_32-NEXT: adcl $0, %ecx
-; AVX512_32-NEXT: vmovd %eax, %xmm0
-; AVX512_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX512_32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
-; AVX512_32-NEXT: fildll {{[0-9]+}}(%esp)
-; AVX512_32-NEXT: fstpl (%esp)
-; AVX512_32-NEXT: fldl (%esp)
-; AVX512_32-NEXT: movl %ebp, %esp
-; AVX512_32-NEXT: popl %ebp
-; AVX512_32-NEXT: retl
+; AVX512DQVL_32-LABEL: s64_to_d_2:
+; AVX512DQVL_32: # %bb.0:
+; AVX512DQVL_32-NEXT: pushl %ebp
+; AVX512DQVL_32-NEXT: movl %esp, %ebp
+; AVX512DQVL_32-NEXT: andl $-8, %esp
+; AVX512DQVL_32-NEXT: subl $8, %esp
+; AVX512DQVL_32-NEXT: movl 8(%ebp), %eax
+; AVX512DQVL_32-NEXT: movl 12(%ebp), %ecx
+; AVX512DQVL_32-NEXT: addl $5, %eax
+; AVX512DQVL_32-NEXT: adcl $0, %ecx
+; AVX512DQVL_32-NEXT: vmovd %eax, %xmm0
+; AVX512DQVL_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512DQVL_32-NEXT: vcvtqq2pd %ymm0, %ymm0
+; AVX512DQVL_32-NEXT: vmovlps %xmm0, (%esp)
+; AVX512DQVL_32-NEXT: fldl (%esp)
+; AVX512DQVL_32-NEXT: movl %ebp, %esp
+; AVX512DQVL_32-NEXT: popl %ebp
+; AVX512DQVL_32-NEXT: vzeroupper
+; AVX512DQVL_32-NEXT: retl
;
; AVX512_64-LABEL: s64_to_d_2:
; AVX512_64: # %bb.0:
@@ -643,6 +727,46 @@ define double @s64_to_d_2(i64 %a) nounwi
; AVX512_64-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0
; AVX512_64-NEXT: retq
;
+; AVX512DQ_32-LABEL: s64_to_d_2:
+; AVX512DQ_32: # %bb.0:
+; AVX512DQ_32-NEXT: pushl %ebp
+; AVX512DQ_32-NEXT: movl %esp, %ebp
+; AVX512DQ_32-NEXT: andl $-8, %esp
+; AVX512DQ_32-NEXT: subl $8, %esp
+; AVX512DQ_32-NEXT: movl 8(%ebp), %eax
+; AVX512DQ_32-NEXT: movl 12(%ebp), %ecx
+; AVX512DQ_32-NEXT: addl $5, %eax
+; AVX512DQ_32-NEXT: adcl $0, %ecx
+; AVX512DQ_32-NEXT: vmovd %eax, %xmm0
+; AVX512DQ_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512DQ_32-NEXT: vcvtqq2pd %zmm0, %zmm0
+; AVX512DQ_32-NEXT: vmovlps %xmm0, (%esp)
+; AVX512DQ_32-NEXT: fldl (%esp)
+; AVX512DQ_32-NEXT: movl %ebp, %esp
+; AVX512DQ_32-NEXT: popl %ebp
+; AVX512DQ_32-NEXT: vzeroupper
+; AVX512DQ_32-NEXT: retl
+;
+; AVX512F_32-LABEL: s64_to_d_2:
+; AVX512F_32: # %bb.0:
+; AVX512F_32-NEXT: pushl %ebp
+; AVX512F_32-NEXT: movl %esp, %ebp
+; AVX512F_32-NEXT: andl $-8, %esp
+; AVX512F_32-NEXT: subl $16, %esp
+; AVX512F_32-NEXT: movl 8(%ebp), %eax
+; AVX512F_32-NEXT: movl 12(%ebp), %ecx
+; AVX512F_32-NEXT: addl $5, %eax
+; AVX512F_32-NEXT: adcl $0, %ecx
+; AVX512F_32-NEXT: vmovd %eax, %xmm0
+; AVX512F_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512F_32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
+; AVX512F_32-NEXT: fildll {{[0-9]+}}(%esp)
+; AVX512F_32-NEXT: fstpl (%esp)
+; AVX512F_32-NEXT: fldl (%esp)
+; AVX512F_32-NEXT: movl %ebp, %esp
+; AVX512F_32-NEXT: popl %ebp
+; AVX512F_32-NEXT: retl
+;
; SSE2_32-LABEL: s64_to_d_2:
; SSE2_32: # %bb.0:
; SSE2_32-NEXT: pushl %ebp
More information about the llvm-commits
mailing list