[llvm] [X86] lowerFPToIntToFP - handle UI2FP on AVX512VL targets and i64 types on AVX512DQ targets (PR #162656)
Kavin Gnanapandithan via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 28 16:32:06 PDT 2025
https://github.com/KavinTheG updated https://github.com/llvm/llvm-project/pull/162656
>From a45fcc9195e6a52d430420f4207a3d9d15ed0f18 Mon Sep 17 00:00:00 2001
From: Kavin Gnanapandithan <kavin.balag at gmail.com>
Date: Thu, 9 Oct 2025 08:41:07 -0400
Subject: [PATCH 01/11] Added AVX512 handling for UI2P in lowerFPToIntToFP
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 66 +++-
llvm/test/CodeGen/X86/isint.ll | 459 +++++++++++++++++++++++-
2 files changed, 508 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2feb76e0eb7b4..9e209405bf99e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19885,7 +19885,7 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
// TODO: Allow FP_TO_UINT.
SDValue CastToInt = CastToFP.getOperand(0);
MVT VT = CastToFP.getSimpleValueType();
- if (CastToInt.getOpcode() != ISD::FP_TO_SINT || VT.isVector())
+ if ((CastToInt.getOpcode() != ISD::FP_TO_SINT && CastToInt.getOpcode() != ISD::FP_TO_UINT) || VT.isVector())
return SDValue();
MVT IntVT = CastToInt.getSimpleValueType();
@@ -19897,22 +19897,68 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
// See if we have 128-bit vector cast instructions for this type of cast.
// We need cvttps2dq/cvttpd2dq and cvtdq2ps/cvtdq2pd.
if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
- IntVT != MVT::i32)
+ !(IntVT == MVT::i32 || (IntVT == MVT::i64 && Subtarget.hasDQI())))
return SDValue();
unsigned SrcSize = SrcVT.getSizeInBits();
unsigned IntSize = IntVT.getSizeInBits();
unsigned VTSize = VT.getSizeInBits();
- MVT VecSrcVT = MVT::getVectorVT(SrcVT, 128 / SrcSize);
- MVT VecIntVT = MVT::getVectorVT(IntVT, 128 / IntSize);
- MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize);
+ unsigned ToIntOpcode, ToFPOpcode;
+ unsigned Width = 128;
+ bool IsUnsigned = CastToInt.getOpcode() == ISD::FP_TO_UINT;
- // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.
- unsigned ToIntOpcode =
- SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
- unsigned ToFPOpcode =
- IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+ if (IntVT == MVT::i32) {
+ if (IsUnsigned && !Subtarget.hasVLX())
+ return SDValue(); // Need AVX512VL for unsigned i32
+ if (Subtarget.hasVLX()) {
+ if (IsUnsigned) {
+ ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT;
+ ToFPOpcode = IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP;
+ } else {
+ ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
+ ToFPOpcode = IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+ }
+ } else {
+ // SSE2
+ ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
+ ToFPOpcode = IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+ }
+ } else {
+ if (Subtarget.hasVLX()) {
+ if (IsUnsigned) {
+ ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT;
+ ToFPOpcode = IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP;
+ } else {
+ ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
+ ToFPOpcode = IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+ }
+ } else {
+ // Need to extend width for AVX512DQ
+ Width = 512;
+ ToIntOpcode = CastToInt.getOpcode();
+ ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
+ }
+ }
+
+ MVT VecSrcVT;
+ MVT VecIntVT;
+ MVT VecVT;
+ if (IntVT == MVT::i64) {
+ unsigned NumElts = Width / IntSize;
+ VecIntVT = MVT::getVectorVT(IntVT, NumElts);
+
+ // minimum legal size is v4f32
+ unsigned SrcElts = (SrcVT == MVT::f32) ? std::max(NumElts, 4u) : NumElts;
+ unsigned VTElts = (VT == MVT::f32) ? std::max(NumElts, 4u) : NumElts;
+
+ VecSrcVT = MVT::getVectorVT(SrcVT, SrcElts);
+ VecVT = MVT::getVectorVT(VT, VTElts);
+ } else {
+ VecSrcVT = MVT::getVectorVT(SrcVT, Width / SrcSize);
+ VecIntVT = MVT::getVectorVT(IntVT, Width / IntSize);
+ VecVT = MVT::getVectorVT(VT, Width / VTSize);
+ }
// sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
//
// We are not defining the high elements (for example, zero them) because
diff --git a/llvm/test/CodeGen/X86/isint.ll b/llvm/test/CodeGen/X86/isint.ll
index 8a56f49a6c755..d0b340ce37875 100644
--- a/llvm/test/CodeGen/X86/isint.ll
+++ b/llvm/test/CodeGen/X86/isint.ll
@@ -1,7 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK64 %s
; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK32 %s
-
+; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f | FileCheck -check-prefix=AVX512-NODQ %s
+; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f,+avx512dq | FileCheck -check-prefix=AVX512-NODQ %s
+; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck -check-prefix=AVX512VL %s
; PR19059
define i32 @isint_return(double %d) nounwind {
@@ -24,6 +26,22 @@ define i32 @isint_return(double %d) nounwind {
; CHECK32-NEXT: movd %xmm1, %eax
; CHECK32-NEXT: andl $1, %eax
; CHECK32-NEXT: retl
+;
+; AVX512-NODQ-LABEL: isint_return:
+; AVX512-NODQ: # %bb.0:
+; AVX512-NODQ-NEXT: vcvttpd2dq %xmm0, %xmm1
+; AVX512-NODQ-NEXT: vcvtdq2pd %xmm1, %xmm1
+; AVX512-NODQ-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT: kmovw %k0, %eax
+; AVX512-NODQ-NEXT: retq
+;
+; AVX512VL-LABEL: isint_return:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm1
+; AVX512VL-NEXT: vcvtdq2pd %xmm1, %xmm1
+; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %eax
+; AVX512VL-NEXT: retq
%i = fptosi double %d to i32
%e = sitofp i32 %i to double
%c = fcmp oeq double %d, %e
@@ -50,6 +68,221 @@ define i32 @isint_float_return(float %f) nounwind {
; CHECK32-NEXT: movd %xmm1, %eax
; CHECK32-NEXT: andl $1, %eax
; CHECK32-NEXT: retl
+;
+; AVX512-NODQ-LABEL: isint_float_return:
+; AVX512-NODQ: # %bb.0:
+; AVX512-NODQ-NEXT: vcvttps2dq %xmm0, %xmm1
+; AVX512-NODQ-NEXT: vcvtdq2ps %xmm1, %xmm1
+; AVX512-NODQ-NEXT: vcmpeqss %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT: kmovw %k0, %eax
+; AVX512-NODQ-NEXT: retq
+;
+; AVX512VL-LABEL: isint_float_return:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm1
+; AVX512VL-NEXT: vcvtdq2ps %xmm1, %xmm1
+; AVX512VL-NEXT: vcmpeqss %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %eax
+; AVX512VL-NEXT: retq
+ %i = fptosi float %f to i32
+ %g = sitofp i32 %i to float
+ %c = fcmp oeq float %f, %g
+ %z = zext i1 %c to i32
+ ret i32 %z
+}
+
+define i64 @isint64_float_return(float %f) nounwind {
+; CHECK64-LABEL: isint64_float_return:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: cvttss2si %xmm0, %rax
+; CHECK64-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK64-NEXT: cmpeqss %xmm0, %xmm1
+; CHECK64-NEXT: movd %xmm1, %eax
+; CHECK64-NEXT: andl $1, %eax
+; CHECK64-NEXT: retq
+;
+; CHECK32-LABEL: isint64_float_return:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: pushl %ebp
+; CHECK32-NEXT: movl %esp, %ebp
+; CHECK32-NEXT: andl $-8, %esp
+; CHECK32-NEXT: subl $32, %esp
+; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: orl $3072, %eax # imm = 0xC00
+; CHECK32-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK32-NEXT: movlps %xmm1, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fildll {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fstps {{[0-9]+}}(%esp)
+; CHECK32-NEXT: cmpeqss {{[0-9]+}}(%esp), %xmm0
+; CHECK32-NEXT: movd %xmm0, %eax
+; CHECK32-NEXT: andl $1, %eax
+; CHECK32-NEXT: xorl %edx, %edx
+; CHECK32-NEXT: movl %ebp, %esp
+; CHECK32-NEXT: popl %ebp
+; CHECK32-NEXT: retl
+;
+; AVX512VL-LABEL: isint64_float_return:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttps2qq %xmm0, %xmm1
+; AVX512VL-NEXT: vcvtqq2ps %xmm1, %xmm1
+; AVX512VL-NEXT: vcmpeqss %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %eax
+; AVX512VL-NEXT: retq
+ %i = fptosi float %f to i64
+ %g = sitofp i64 %i to float
+ %c = fcmp oeq float %f, %g
+ %z = zext i1 %c to i64
+ ret i64 %z
+}
+
+define i64 @isint64_return(double %d) nounwind {
+; CHECK64-LABEL: isint64_return:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: cvttsd2si %xmm0, %rax
+; CHECK64-NEXT: cvtsi2sd %rax, %xmm1
+; CHECK64-NEXT: cmpeqsd %xmm0, %xmm1
+; CHECK64-NEXT: movq %xmm1, %rax
+; CHECK64-NEXT: andl $1, %eax
+; CHECK64-NEXT: retq
+;
+; CHECK32-LABEL: isint64_return:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: pushl %ebp
+; CHECK32-NEXT: movl %esp, %ebp
+; CHECK32-NEXT: andl $-8, %esp
+; CHECK32-NEXT: subl $32, %esp
+; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: orl $3072, %eax # imm = 0xC00
+; CHECK32-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK32-NEXT: movlps %xmm1, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fildll {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fstpl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: cmpeqsd {{[0-9]+}}(%esp), %xmm0
+; CHECK32-NEXT: movd %xmm0, %eax
+; CHECK32-NEXT: andl $1, %eax
+; CHECK32-NEXT: xorl %edx, %edx
+; CHECK32-NEXT: movl %ebp, %esp
+; CHECK32-NEXT: popl %ebp
+; CHECK32-NEXT: retl
+;
+; AVX512VL-LABEL: isint64_return:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttpd2qq %xmm0, %xmm1
+; AVX512VL-NEXT: vcvtqq2pd %xmm1, %xmm1
+; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %eax
+; AVX512VL-NEXT: retq
+ %i = fptosi double %d to i64
+ %g = sitofp i64 %i to double
+ %c = fcmp oeq double %d, %g
+ %z = zext i1 %c to i64
+ ret i64 %z
+}
+
+define i32 @isuint_return(double %d) nounwind {
+; CHECK64-LABEL: isuint_return:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: cvttsd2si %xmm0, %rax
+; CHECK64-NEXT: movl %eax, %eax
+; CHECK64-NEXT: cvtsi2sd %rax, %xmm1
+; CHECK64-NEXT: cmpeqsd %xmm0, %xmm1
+; CHECK64-NEXT: movq %xmm1, %rax
+; CHECK64-NEXT: andl $1, %eax
+; CHECK64-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK64-NEXT: retq
+;
+; CHECK32-LABEL: isuint_return:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK32-NEXT: cvttsd2si %xmm0, %eax
+; CHECK32-NEXT: movl %eax, %ecx
+; CHECK32-NEXT: sarl $31, %ecx
+; CHECK32-NEXT: movapd %xmm0, %xmm1
+; CHECK32-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; CHECK32-NEXT: cvttsd2si %xmm1, %edx
+; CHECK32-NEXT: andl %ecx, %edx
+; CHECK32-NEXT: orl %eax, %edx
+; CHECK32-NEXT: movd %edx, %xmm1
+; CHECK32-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; CHECK32-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1
+; CHECK32-NEXT: movd %xmm1, %eax
+; CHECK32-NEXT: andl $1, %eax
+; CHECK32-NEXT: retl
+;
+; AVX512-NODQ-LABEL: isuint_return:
+; AVX512-NODQ: # %bb.0:
+; AVX512-NODQ-NEXT: vcvttsd2usi %xmm0, %eax
+; AVX512-NODQ-NEXT: vcvtusi2sd %eax, %xmm15, %xmm1
+; AVX512-NODQ-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT: kmovw %k0, %eax
+; AVX512-NODQ-NEXT: retq
+;
+; AVX512VL-LABEL: isuint_return:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttsd2usi %xmm0, %eax
+; AVX512VL-NEXT: vcvtusi2sd %eax, %xmm15, %xmm1
+; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %eax
+; AVX512VL-NEXT: retq
+ %i = fptoui double %d to i32
+ %e = uitofp i32 %i to double
+ %c = fcmp oeq double %d, %e
+ %z = zext i1 %c to i32
+ ret i32 %z
+}
+
+define i32 @isuint_float_return(float %f) nounwind {
+; CHECK64-LABEL: isuint_float_return:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: cvttps2dq %xmm0, %xmm1
+; CHECK64-NEXT: cvtdq2ps %xmm1, %xmm1
+; CHECK64-NEXT: cmpeqss %xmm0, %xmm1
+; CHECK64-NEXT: movd %xmm1, %eax
+; CHECK64-NEXT: andl $1, %eax
+; CHECK64-NEXT: retq
+;
+; CHECK32-LABEL: isuint_float_return:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK32-NEXT: cvttps2dq %xmm0, %xmm1
+; CHECK32-NEXT: cvtdq2ps %xmm1, %xmm1
+; CHECK32-NEXT: cmpeqss %xmm0, %xmm1
+; CHECK32-NEXT: movd %xmm1, %eax
+; CHECK32-NEXT: andl $1, %eax
+; CHECK32-NEXT: retl
+;
+; AVX512-NODQ-LABEL: isuint_float_return:
+; AVX512-NODQ: # %bb.0:
+; AVX512-NODQ-NEXT: vcvttps2dq %xmm0, %xmm1
+; AVX512-NODQ-NEXT: vcvtdq2ps %xmm1, %xmm1
+; AVX512-NODQ-NEXT: vcmpeqss %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT: kmovw %k0, %eax
+; AVX512-NODQ-NEXT: retq
+;
+; AVX512VL-LABEL: isuint_float_return:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm1
+; AVX512VL-NEXT: vcvtdq2ps %xmm1, %xmm1
+; AVX512VL-NEXT: vcmpeqss %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %eax
+; AVX512VL-NEXT: retq
%i = fptosi float %f to i32
%g = sitofp i32 %i to float
%c = fcmp oeq float %f, %g
@@ -57,6 +290,190 @@ define i32 @isint_float_return(float %f) nounwind {
ret i32 %z
}
+define i64 @isuint64_return(double %d) nounwind {
+; CHECK64-LABEL: isuint64_return:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: cvttsd2si %xmm0, %rax
+; CHECK64-NEXT: movq %rax, %rcx
+; CHECK64-NEXT: sarq $63, %rcx
+; CHECK64-NEXT: movapd %xmm0, %xmm1
+; CHECK64-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK64-NEXT: cvttsd2si %xmm1, %rdx
+; CHECK64-NEXT: andq %rcx, %rdx
+; CHECK64-NEXT: orq %rax, %rdx
+; CHECK64-NEXT: movq %rdx, %xmm1
+; CHECK64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; CHECK64-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK64-NEXT: movapd %xmm1, %xmm2
+; CHECK64-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
+; CHECK64-NEXT: addsd %xmm1, %xmm2
+; CHECK64-NEXT: cmpeqsd %xmm0, %xmm2
+; CHECK64-NEXT: movq %xmm2, %rax
+; CHECK64-NEXT: andl $1, %eax
+; CHECK64-NEXT: retq
+;
+; CHECK32-LABEL: isuint64_return:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: pushl %ebp
+; CHECK32-NEXT: movl %esp, %ebp
+; CHECK32-NEXT: andl $-8, %esp
+; CHECK32-NEXT: subl $16, %esp
+; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK32-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
+; CHECK32-NEXT: ucomisd %xmm0, %xmm1
+; CHECK32-NEXT: jbe .LBB6_2
+; CHECK32-NEXT: # %bb.1:
+; CHECK32-NEXT: xorpd %xmm1, %xmm1
+; CHECK32-NEXT: .LBB6_2:
+; CHECK32-NEXT: movapd %xmm0, %xmm2
+; CHECK32-NEXT: subsd %xmm1, %xmm2
+; CHECK32-NEXT: movsd %xmm2, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: setbe %al
+; CHECK32-NEXT: fldl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT: orl $3072, %ecx # imm = 0xC00
+; CHECK32-NEXT: movw %cx, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: movzbl %al, %eax
+; CHECK32-NEXT: shll $31, %eax
+; CHECK32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movd %eax, %xmm1
+; CHECK32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1]
+; CHECK32-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
+; CHECK32-NEXT: movapd %xmm2, %xmm1
+; CHECK32-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
+; CHECK32-NEXT: addsd %xmm2, %xmm1
+; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1
+; CHECK32-NEXT: movd %xmm1, %eax
+; CHECK32-NEXT: andl $1, %eax
+; CHECK32-NEXT: xorl %edx, %edx
+; CHECK32-NEXT: movl %ebp, %esp
+; CHECK32-NEXT: popl %ebp
+; CHECK32-NEXT: retl
+;
+; AVX512-NODQ-LABEL: isuint64_return:
+; AVX512-NODQ: # %bb.0:
+; AVX512-NODQ-NEXT: vcvttsd2usi %xmm0, %rax
+; AVX512-NODQ-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1
+; AVX512-NODQ-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT: kmovw %k0, %eax
+; AVX512-NODQ-NEXT: retq
+;
+; AVX512VL-LABEL: isuint64_return:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
+; AVX512VL-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1
+; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %eax
+; AVX512VL-NEXT: retq
+ %i = fptoui double %d to i64
+ %e = uitofp i64 %i to double
+ %c = fcmp oeq double %d, %e
+ %z = zext i1 %c to i64
+ ret i64 %z
+}
+
+define i64 @isuint64_float_return(float %f) nounwind {
+; CHECK64-LABEL: isuint64_float_return:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: cvttss2si %xmm0, %rcx
+; CHECK64-NEXT: movq %rcx, %rdx
+; CHECK64-NEXT: sarq $63, %rdx
+; CHECK64-NEXT: movaps %xmm0, %xmm1
+; CHECK64-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK64-NEXT: cvttss2si %xmm1, %rax
+; CHECK64-NEXT: andq %rdx, %rax
+; CHECK64-NEXT: orq %rcx, %rax
+; CHECK64-NEXT: js .LBB7_1
+; CHECK64-NEXT: # %bb.2:
+; CHECK64-NEXT: xorps %xmm1, %xmm1
+; CHECK64-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK64-NEXT: jmp .LBB7_3
+; CHECK64-NEXT: .LBB7_1:
+; CHECK64-NEXT: movq %rax, %rcx
+; CHECK64-NEXT: shrq %rcx
+; CHECK64-NEXT: andl $1, %eax
+; CHECK64-NEXT: orq %rcx, %rax
+; CHECK64-NEXT: xorps %xmm1, %xmm1
+; CHECK64-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK64-NEXT: addss %xmm1, %xmm1
+; CHECK64-NEXT: .LBB7_3:
+; CHECK64-NEXT: cmpeqss %xmm1, %xmm0
+; CHECK64-NEXT: movd %xmm0, %eax
+; CHECK64-NEXT: andl $1, %eax
+; CHECK64-NEXT: retq
+;
+; CHECK32-LABEL: isuint64_float_return:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: pushl %ebp
+; CHECK32-NEXT: movl %esp, %ebp
+; CHECK32-NEXT: andl $-8, %esp
+; CHECK32-NEXT: subl $32, %esp
+; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK32-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
+; CHECK32-NEXT: ucomiss %xmm0, %xmm1
+; CHECK32-NEXT: jbe .LBB7_2
+; CHECK32-NEXT: # %bb.1:
+; CHECK32-NEXT: xorps %xmm1, %xmm1
+; CHECK32-NEXT: .LBB7_2:
+; CHECK32-NEXT: movaps %xmm0, %xmm2
+; CHECK32-NEXT: subss %xmm1, %xmm2
+; CHECK32-NEXT: movss %xmm2, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: setbe %al
+; CHECK32-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT: orl $3072, %ecx # imm = 0xC00
+; CHECK32-NEXT: movw %cx, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: movzbl %al, %eax
+; CHECK32-NEXT: shll $31, %eax
+; CHECK32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movd %eax, %xmm1
+; CHECK32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK32-NEXT: movq %xmm2, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: shrl $31, %eax
+; CHECK32-NEXT: fildll {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
+; CHECK32-NEXT: fstps {{[0-9]+}}(%esp)
+; CHECK32-NEXT: cmpeqss {{[0-9]+}}(%esp), %xmm0
+; CHECK32-NEXT: movd %xmm0, %eax
+; CHECK32-NEXT: andl $1, %eax
+; CHECK32-NEXT: xorl %edx, %edx
+; CHECK32-NEXT: movl %ebp, %esp
+; CHECK32-NEXT: popl %ebp
+; CHECK32-NEXT: retl
+;
+; AVX512-NODQ-LABEL: isuint64_float_return:
+; AVX512-NODQ: # %bb.0:
+; AVX512-NODQ-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512-NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
+; AVX512-NODQ-NEXT: vcmpeqss %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT: kmovw %k0, %eax
+; AVX512-NODQ-NEXT: retq
+;
+; AVX512VL-LABEL: isuint64_float_return:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
+; AVX512VL-NEXT: vcmpeqss %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %eax
+; AVX512VL-NEXT: retq
+ %i = fptoui float %f to i64
+ %g = uitofp i64 %i to float
+ %c = fcmp oeq float %f, %g
+ %z = zext i1 %c to i64
+ ret i64 %z
+}
+
declare void @foo()
define void @isint_branch(double %d) nounwind {
@@ -65,13 +482,13 @@ define void @isint_branch(double %d) nounwind {
; CHECK64-NEXT: cvttpd2dq %xmm0, %xmm1
; CHECK64-NEXT: cvtdq2pd %xmm1, %xmm1
; CHECK64-NEXT: ucomisd %xmm1, %xmm0
-; CHECK64-NEXT: jne .LBB2_2
-; CHECK64-NEXT: jp .LBB2_2
+; CHECK64-NEXT: jne .LBB8_2
+; CHECK64-NEXT: jp .LBB8_2
; CHECK64-NEXT: # %bb.1: # %true
; CHECK64-NEXT: pushq %rax
; CHECK64-NEXT: callq foo at PLT
; CHECK64-NEXT: popq %rax
-; CHECK64-NEXT: .LBB2_2: # %false
+; CHECK64-NEXT: .LBB8_2: # %false
; CHECK64-NEXT: retq
;
; CHECK32-LABEL: isint_branch:
@@ -80,12 +497,40 @@ define void @isint_branch(double %d) nounwind {
; CHECK32-NEXT: cvttpd2dq %xmm0, %xmm1
; CHECK32-NEXT: cvtdq2pd %xmm1, %xmm1
; CHECK32-NEXT: ucomisd %xmm1, %xmm0
-; CHECK32-NEXT: jne .LBB2_2
-; CHECK32-NEXT: jp .LBB2_2
+; CHECK32-NEXT: jne .LBB8_2
+; CHECK32-NEXT: jp .LBB8_2
; CHECK32-NEXT: # %bb.1: # %true
; CHECK32-NEXT: calll foo at PLT
-; CHECK32-NEXT: .LBB2_2: # %false
+; CHECK32-NEXT: .LBB8_2: # %false
; CHECK32-NEXT: retl
+;
+; AVX512-NODQ-LABEL: isint_branch:
+; AVX512-NODQ: # %bb.0:
+; AVX512-NODQ-NEXT: vcvttpd2dq %xmm0, %xmm1
+; AVX512-NODQ-NEXT: vcvtdq2pd %xmm1, %xmm1
+; AVX512-NODQ-NEXT: vucomisd %xmm1, %xmm0
+; AVX512-NODQ-NEXT: jne .LBB8_2
+; AVX512-NODQ-NEXT: jp .LBB8_2
+; AVX512-NODQ-NEXT: # %bb.1: # %true
+; AVX512-NODQ-NEXT: pushq %rax
+; AVX512-NODQ-NEXT: callq foo at PLT
+; AVX512-NODQ-NEXT: popq %rax
+; AVX512-NODQ-NEXT: .LBB8_2: # %false
+; AVX512-NODQ-NEXT: retq
+;
+; AVX512VL-LABEL: isint_branch:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm1
+; AVX512VL-NEXT: vcvtdq2pd %xmm1, %xmm1
+; AVX512VL-NEXT: vucomisd %xmm1, %xmm0
+; AVX512VL-NEXT: jne .LBB8_2
+; AVX512VL-NEXT: jp .LBB8_2
+; AVX512VL-NEXT: # %bb.1: # %true
+; AVX512VL-NEXT: pushq %rax
+; AVX512VL-NEXT: callq foo at PLT
+; AVX512VL-NEXT: popq %rax
+; AVX512VL-NEXT: .LBB8_2: # %false
+; AVX512VL-NEXT: retq
%i = fptosi double %d to i32
%e = sitofp i32 %i to double
%c = fcmp oeq double %d, %e
>From b7f1545f29a61ca9cd1b2bf78ba134329c2709af Mon Sep 17 00:00:00 2001
From: Kavin Gnanapandithan <kavin.balag at gmail.com>
Date: Thu, 9 Oct 2025 09:07:02 -0400
Subject: [PATCH 02/11] Formatted lowerFPToIntToFP using git clang-format
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 44 ++++++++++++++++---------
1 file changed, 28 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9e209405bf99e..946dbf8361aaf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19885,7 +19885,9 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
// TODO: Allow FP_TO_UINT.
SDValue CastToInt = CastToFP.getOperand(0);
MVT VT = CastToFP.getSimpleValueType();
- if ((CastToInt.getOpcode() != ISD::FP_TO_SINT && CastToInt.getOpcode() != ISD::FP_TO_UINT) || VT.isVector())
+ if ((CastToInt.getOpcode() != ISD::FP_TO_SINT &&
+ CastToInt.getOpcode() != ISD::FP_TO_UINT) ||
+ VT.isVector())
return SDValue();
MVT IntVT = CastToInt.getSimpleValueType();
@@ -19913,25 +19915,35 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
if (Subtarget.hasVLX()) {
if (IsUnsigned) {
- ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT;
- ToFPOpcode = IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP;
+ ToIntOpcode =
+ SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT;
+ ToFPOpcode =
+ IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP;
} else {
- ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
- ToFPOpcode = IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+ ToIntOpcode =
+ SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
+ ToFPOpcode =
+ IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
}
- } else {
+ } else {
// SSE2
- ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
- ToFPOpcode = IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+ ToIntOpcode =
+ SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
+ ToFPOpcode =
+ IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
}
} else {
if (Subtarget.hasVLX()) {
if (IsUnsigned) {
- ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT;
- ToFPOpcode = IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP;
+ ToIntOpcode =
+ SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT;
+ ToFPOpcode =
+ IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP;
} else {
- ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
- ToFPOpcode = IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+ ToIntOpcode =
+ SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
+ ToFPOpcode =
+ IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
}
} else {
// Need to extend width for AVX512DQ
@@ -19941,19 +19953,19 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
}
}
- MVT VecSrcVT;
+ MVT VecSrcVT;
MVT VecIntVT;
MVT VecVT;
if (IntVT == MVT::i64) {
unsigned NumElts = Width / IntSize;
VecIntVT = MVT::getVectorVT(IntVT, NumElts);
-
+
// minimum legal size is v4f32
unsigned SrcElts = (SrcVT == MVT::f32) ? std::max(NumElts, 4u) : NumElts;
unsigned VTElts = (VT == MVT::f32) ? std::max(NumElts, 4u) : NumElts;
-
+
VecSrcVT = MVT::getVectorVT(SrcVT, SrcElts);
- VecVT = MVT::getVectorVT(VT, VTElts);
+ VecVT = MVT::getVectorVT(VT, VTElts);
} else {
VecSrcVT = MVT::getVectorVT(SrcVT, Width / SrcSize);
VecIntVT = MVT::getVectorVT(IntVT, Width / IntSize);
>From ba8fad5dbc83823aa99c9ef898246a2c7c0c9ef8 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Fri, 10 Oct 2025 13:35:46 +0100
Subject: [PATCH 03/11] [X86] Add additional test coverage for #160111
---
llvm/test/CodeGen/X86/fp-int-fp-cvt.ll | 240 +++++++++++++++++++++++++
1 file changed, 240 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
diff --git a/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
new file mode 100644
index 0000000000000..b6c17cecffbd6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
@@ -0,0 +1,240 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512,AVX512-VL
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 -mattr=-avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512-NOVL
+
+;
+; fptosi -> sitofp
+;
+
+define double @scvtf64_i32(double %a0) {
+; SSE-LABEL: scvtf64_i32:
+; SSE: # %bb.0:
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: scvtf64_i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
+; AVX-NEXT: retq
+ %ii = fptosi double %a0 to i32
+ %ff = sitofp i32 %ii to double
+ ret double %ff
+}
+
+define double @scvtf64_i64(double %a0) {
+; SSE-LABEL: scvtf64_i64:
+; SSE: # %bb.0:
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2sd %rax, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: scvtf64_i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttsd2si %xmm0, %rax
+; AVX-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
+; AVX-NEXT: retq
+ %ii = fptosi double %a0 to i64
+ %ff = sitofp i64 %ii to double
+ ret double %ff
+}
+
+define float @scvtf32_i32(float %a0) {
+; SSE-LABEL: scvtf32_i32:
+; SSE: # %bb.0:
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: scvtf32_i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX-NEXT: retq
+ %ii = fptosi float %a0 to i32
+ %ff = sitofp i32 %ii to float
+ ret float %ff
+}
+
+define float @scvtf32_i64(float %a0) {
+; SSE-LABEL: scvtf32_i64:
+; SSE: # %bb.0:
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ss %rax, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: scvtf32_i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttss2si %xmm0, %rax
+; AVX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
+; AVX-NEXT: retq
+ %ii = fptosi float %a0 to i64
+ %ff = sitofp i64 %ii to float
+ ret float %ff
+}
+
+;
+; fptoui -> uitofp
+;
+
+define double @ucvtf64_i32(double %a0) {
+; SSE-LABEL: ucvtf64_i32:
+; SSE: # %bb.0:
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: movl %eax, %eax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2sd %rax, %xmm0
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: ucvtf64_i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vcvttsd2si %xmm0, %rax
+; AVX2-NEXT: movl %eax, %eax
+; AVX2-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: ucvtf64_i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
+; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm0
+; AVX512-NEXT: retq
+ %ii = fptoui double %a0 to i32
+ %ff = uitofp i32 %ii to double
+ ret double %ff
+}
+
+define double @ucvtf64_i64(double %a0) {
+; SSE-LABEL: ucvtf64_i64:
+; SSE: # %bb.0:
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: movq %rax, %rcx
+; SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: cvttsd2si %xmm0, %rdx
+; SSE-NEXT: sarq $63, %rcx
+; SSE-NEXT: andq %rcx, %rdx
+; SSE-NEXT: orq %rax, %rdx
+; SSE-NEXT: movq %rdx, %xmm1
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; SSE-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; SSE-NEXT: addsd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: ucvtf64_i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vcvttsd2si %xmm0, %rax
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: sarq $63, %rcx
+; AVX2-NEXT: vcvttsd2si %xmm0, %rdx
+; AVX2-NEXT: andq %rcx, %rdx
+; AVX2-NEXT: orq %rax, %rdx
+; AVX2-NEXT: vmovq %rdx, %xmm0
+; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX2-NEXT: vaddsd %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: ucvtf64_i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvttsd2usi %xmm0, %rax
+; AVX512-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0
+; AVX512-NEXT: retq
+ %ii = fptoui double %a0 to i64
+ %ff = uitofp i64 %ii to double
+ ret double %ff
+}
+
+define float @ucvtf32_i32(float %a0) {
+; SSE-LABEL: ucvtf32_i32:
+; SSE: # %bb.0:
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: movl %eax, %eax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ss %rax, %xmm0
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: ucvtf32_i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vcvttss2si %xmm0, %rax
+; AVX2-NEXT: movl %eax, %eax
+; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: ucvtf32_i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvttss2usi %xmm0, %eax
+; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0
+; AVX512-NEXT: retq
+ %ii = fptoui float %a0 to i32
+ %ff = uitofp i32 %ii to float
+ ret float %ff
+}
+
+define float @ucvtf32_i64(float %a0) {
+; SSE-LABEL: ucvtf32_i64:
+; SSE: # %bb.0:
+; SSE-NEXT: cvttss2si %xmm0, %rcx
+; SSE-NEXT: movq %rcx, %rdx
+; SSE-NEXT: sarq $63, %rdx
+; SSE-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: andq %rdx, %rax
+; SSE-NEXT: orq %rcx, %rax
+; SSE-NEXT: js .LBB7_1
+; SSE-NEXT: # %bb.2:
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ss %rax, %xmm0
+; SSE-NEXT: retq
+; SSE-NEXT: .LBB7_1:
+; SSE-NEXT: movq %rax, %rcx
+; SSE-NEXT: shrq %rcx
+; SSE-NEXT: andl $1, %eax
+; SSE-NEXT: orq %rcx, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ss %rax, %xmm0
+; SSE-NEXT: addss %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: ucvtf32_i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vcvttss2si %xmm0, %rcx
+; AVX2-NEXT: movq %rcx, %rdx
+; AVX2-NEXT: sarq $63, %rdx
+; AVX2-NEXT: vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vcvttss2si %xmm0, %rax
+; AVX2-NEXT: andq %rdx, %rax
+; AVX2-NEXT: orq %rcx, %rax
+; AVX2-NEXT: js .LBB7_1
+; AVX2-NEXT: # %bb.2:
+; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
+; AVX2-NEXT: retq
+; AVX2-NEXT: .LBB7_1:
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: shrq %rcx
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: orq %rcx, %rax
+; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
+; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: ucvtf32_i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
+; AVX512-NEXT: retq
+ %ii = fptoui float %a0 to i64
+ %ff = uitofp i64 %ii to float
+ ret float %ff
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX512-NOVL: {{.*}}
+; AVX512-VL: {{.*}}
>From 8d084530d213bb126179c03793be1555083081f6 Mon Sep 17 00:00:00 2001
From: Kavin Gnanapandithan <kavin.balag at gmail.com>
Date: Sat, 11 Oct 2025 16:11:44 -0400
Subject: [PATCH 04/11] Added i64 handling in lowerFPToIntToFP & modified
associated test case
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 85 ++++++++++---------------
llvm/test/CodeGen/X86/fp-int-fp-cvt.ll | 53 +++++++++++----
2 files changed, 72 insertions(+), 66 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e258f2793f66d..2813991d9c1aa 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19934,68 +19934,47 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
unsigned Width = 128;
bool IsUnsigned = CastToInt.getOpcode() == ISD::FP_TO_UINT;
- if (IntVT == MVT::i32) {
- if (IsUnsigned && !Subtarget.hasVLX())
- return SDValue(); // Need AVX512VL for unsigned i32
-
- if (Subtarget.hasVLX()) {
- if (IsUnsigned) {
- ToIntOpcode =
- SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT;
- ToFPOpcode =
- IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP;
- } else {
- ToIntOpcode =
- SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
- ToFPOpcode =
- IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
- }
+ if (Subtarget.hasVLX() && IntVT == MVT::i64) {
+ // AVX512DQ+VLX
+ if (IsUnsigned) {
+ ToIntOpcode =
+ SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT;
+ ToFPOpcode =
+ IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP;
} else {
- // SSE2
ToIntOpcode =
SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
ToFPOpcode =
IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
}
+ } else if (IntVT == MVT::i64) {
+ // Need to extend width for AVX512DQ without AVX512VL
+ Width = 512;
+ ToIntOpcode = CastToInt.getOpcode();
+ ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
} else {
- if (Subtarget.hasVLX()) {
- if (IsUnsigned) {
- ToIntOpcode =
- SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT;
- ToFPOpcode =
- IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP;
- } else {
- ToIntOpcode =
- SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
- ToFPOpcode =
- IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
- }
- } else {
- // Need to extend width for AVX512DQ
- Width = 512;
- ToIntOpcode = CastToInt.getOpcode();
- ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
- }
- }
-
- MVT VecSrcVT;
- MVT VecIntVT;
- MVT VecVT;
- if (IntVT == MVT::i64) {
- unsigned NumElts = Width / IntSize;
- VecIntVT = MVT::getVectorVT(IntVT, NumElts);
-
- // minimum legal size is v4f32
- unsigned SrcElts = (SrcVT == MVT::f32) ? std::max(NumElts, 4u) : NumElts;
- unsigned VTElts = (VT == MVT::f32) ? std::max(NumElts, 4u) : NumElts;
-
- VecSrcVT = MVT::getVectorVT(SrcVT, SrcElts);
- VecVT = MVT::getVectorVT(VT, VTElts);
+ // SSE2
+ ToIntOpcode =
+ SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
+ ToFPOpcode =
+ IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+ }
+
+ MVT VecSrcVT, VecIntVT, VecVT;
+ unsigned NumElts = Width / IntSize;
+ VecIntVT = MVT::getVectorVT(IntVT, NumElts);
+ unsigned SrcElts, VTElts;
+ // vcvttps2qq cannot convert v16f32 <-> v8i64
+ if (IntVT == MVT::i64 && Width == 512) {
+ SrcElts = NumElts;
+ VTElts = NumElts;
} else {
- VecSrcVT = MVT::getVectorVT(SrcVT, Width / SrcSize);
- VecIntVT = MVT::getVectorVT(IntVT, Width / IntSize);
- VecVT = MVT::getVectorVT(VT, Width / VTSize);
+ SrcElts = Width / SrcSize;
+ VTElts = Width / VTSize;
}
+
+ VecSrcVT = MVT::getVectorVT(SrcVT, SrcElts);
+ VecVT = MVT::getVectorVT(VT, VTElts);
// sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
//
// We are not defining the high elements (for example, zero them) because
diff --git a/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
index b6c17cecffbd6..c0a9c6113b9e8 100644
--- a/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
+++ b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
@@ -34,11 +34,26 @@ define double @scvtf64_i64(double %a0) {
; SSE-NEXT: cvtsi2sd %rax, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: scvtf64_i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vcvttsd2si %xmm0, %rax
-; AVX-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
-; AVX-NEXT: retq
+; AVX2-LABEL: scvtf64_i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vcvttsd2si %xmm0, %rax
+; AVX2-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-VL-LABEL: scvtf64_i64:
+; AVX512-VL: # %bb.0:
+; AVX512-VL-NEXT: vcvttpd2qq %xmm0, %xmm0
+; AVX512-VL-NEXT: vcvtqq2pd %xmm0, %xmm0
+; AVX512-VL-NEXT: retq
+;
+; AVX512-NOVL-LABEL: scvtf64_i64:
+; AVX512-NOVL: # %bb.0:
+; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NOVL-NEXT: vcvttpd2qq %zmm0, %zmm0
+; AVX512-NOVL-NEXT: vcvtqq2pd %zmm0, %zmm0
+; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NOVL-NEXT: vzeroupper
+; AVX512-NOVL-NEXT: retq
%ii = fptosi double %a0 to i64
%ff = sitofp i64 %ii to double
ret double %ff
@@ -69,11 +84,26 @@ define float @scvtf32_i64(float %a0) {
; SSE-NEXT: cvtsi2ss %rax, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: scvtf32_i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vcvttss2si %xmm0, %rax
-; AVX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
-; AVX-NEXT: retq
+; AVX2-LABEL: scvtf32_i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vcvttss2si %xmm0, %rax
+; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-VL-LABEL: scvtf32_i64:
+; AVX512-VL: # %bb.0:
+; AVX512-VL-NEXT: vcvttps2qq %xmm0, %xmm0
+; AVX512-VL-NEXT: vcvtqq2ps %xmm0, %xmm0
+; AVX512-VL-NEXT: retq
+;
+; AVX512-NOVL-LABEL: scvtf32_i64:
+; AVX512-NOVL: # %bb.0:
+; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512-NOVL-NEXT: vcvttps2qq %ymm0, %zmm0
+; AVX512-NOVL-NEXT: vcvtqq2ps %zmm0, %ymm0
+; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NOVL-NEXT: vzeroupper
+; AVX512-NOVL-NEXT: retq
%ii = fptosi float %a0 to i64
%ff = sitofp i64 %ii to float
ret float %ff
@@ -235,6 +265,3 @@ define float @ucvtf32_i64(float %a0) {
%ff = uitofp i64 %ii to float
ret float %ff
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX512-NOVL: {{.*}}
-; AVX512-VL: {{.*}}
>From a4aae578510be0764980645336894777a791b657 Mon Sep 17 00:00:00 2001
From: Kavin Gnanapandithan <kavin.balag at gmail.com>
Date: Sat, 11 Oct 2025 16:12:38 -0400
Subject: [PATCH 05/11] Updated prefix in isint.ll
---
llvm/test/CodeGen/X86/isint.ll | 655 ++++++++++++++++-----------------
1 file changed, 317 insertions(+), 338 deletions(-)
diff --git a/llvm/test/CodeGen/X86/isint.ll b/llvm/test/CodeGen/X86/isint.ll
index f679821622cd9..691a56197ca32 100644
--- a/llvm/test/CodeGen/X86/isint.ll
+++ b/llvm/test/CodeGen/X86/isint.ll
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK64 %s
-; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK32 %s
-; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f | FileCheck -check-prefix=AVX512-NODQ %s
-; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f,+avx512dq | FileCheck -check-prefix=AVX512-NODQ %s
+; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=X86 %s
+; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=SSE2 %s
; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck -check-prefix=AVX512VL %s
; PR19059
@@ -17,23 +15,25 @@ define i32 @isint_return(double %d) nounwind {
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
;
-; CHECK32-LABEL: isint_return:
-; CHECK32: # %bb.0:
-; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK32-NEXT: cvttpd2dq %xmm0, %xmm1
-; CHECK32-NEXT: cvtdq2pd %xmm1, %xmm1
-; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1
-; CHECK32-NEXT: movd %xmm1, %eax
-; CHECK32-NEXT: andl $1, %eax
-; CHECK32-NEXT: retl
+; X86-LABEL: isint_return:
+; X86: # %bb.0:
+; X86-NEXT: cvttpd2dq %xmm0, %xmm1
+; X86-NEXT: cvtdq2pd %xmm1, %xmm1
+; X86-NEXT: cmpeqsd %xmm0, %xmm1
+; X86-NEXT: movq %xmm1, %rax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: # kill: def $eax killed $eax killed $rax
+; X86-NEXT: retq
;
-; AVX512-NODQ-LABEL: isint_return:
-; AVX512-NODQ: # %bb.0:
-; AVX512-NODQ-NEXT: vcvttpd2dq %xmm0, %xmm1
-; AVX512-NODQ-NEXT: vcvtdq2pd %xmm1, %xmm1
-; AVX512-NODQ-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
-; AVX512-NODQ-NEXT: kmovw %k0, %eax
-; AVX512-NODQ-NEXT: retq
+; SSE2-LABEL: isint_return:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: cvttpd2dq %xmm0, %xmm1
+; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
+; SSE2-NEXT: cmpeqsd %xmm0, %xmm1
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: retl
;
; AVX512VL-LABEL: isint_return:
; AVX512VL: # %bb.0:
@@ -59,23 +59,24 @@ define i32 @isint_float_return(float %f) nounwind {
; X64-NEXT: andl $1, %eax
; X64-NEXT: retq
;
-; CHECK32-LABEL: isint_float_return:
-; CHECK32: # %bb.0:
-; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK32-NEXT: cvttps2dq %xmm0, %xmm1
-; CHECK32-NEXT: cvtdq2ps %xmm1, %xmm1
-; CHECK32-NEXT: cmpeqss %xmm0, %xmm1
-; CHECK32-NEXT: movd %xmm1, %eax
-; CHECK32-NEXT: andl $1, %eax
-; CHECK32-NEXT: retl
+; X86-LABEL: isint_float_return:
+; X86: # %bb.0:
+; X86-NEXT: cvttps2dq %xmm0, %xmm1
+; X86-NEXT: cvtdq2ps %xmm1, %xmm1
+; X86-NEXT: cmpeqss %xmm0, %xmm1
+; X86-NEXT: movd %xmm1, %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: retq
;
-; AVX512-NODQ-LABEL: isint_float_return:
-; AVX512-NODQ: # %bb.0:
-; AVX512-NODQ-NEXT: vcvttps2dq %xmm0, %xmm1
-; AVX512-NODQ-NEXT: vcvtdq2ps %xmm1, %xmm1
-; AVX512-NODQ-NEXT: vcmpeqss %xmm1, %xmm0, %k0
-; AVX512-NODQ-NEXT: kmovw %k0, %eax
-; AVX512-NODQ-NEXT: retq
+; SSE2-LABEL: isint_float_return:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: cvttps2dq %xmm0, %xmm1
+; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1
+; SSE2-NEXT: cmpeqss %xmm0, %xmm1
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: retl
;
; AVX512VL-LABEL: isint_float_return:
; AVX512VL: # %bb.0:
@@ -92,42 +93,42 @@ define i32 @isint_float_return(float %f) nounwind {
}
define i64 @isint64_float_return(float %f) nounwind {
-; CHECK64-LABEL: isint64_float_return:
-; CHECK64: # %bb.0:
-; CHECK64-NEXT: cvttss2si %xmm0, %rax
-; CHECK64-NEXT: cvtsi2ss %rax, %xmm1
-; CHECK64-NEXT: cmpeqss %xmm0, %xmm1
-; CHECK64-NEXT: movd %xmm1, %eax
-; CHECK64-NEXT: andl $1, %eax
-; CHECK64-NEXT: retq
+; X86-LABEL: isint64_float_return:
+; X86: # %bb.0:
+; X86-NEXT: cvttss2si %xmm0, %rax
+; X86-NEXT: cvtsi2ss %rax, %xmm1
+; X86-NEXT: cmpeqss %xmm0, %xmm1
+; X86-NEXT: movd %xmm1, %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: retq
;
-; CHECK32-LABEL: isint64_float_return:
-; CHECK32: # %bb.0:
-; CHECK32-NEXT: pushl %ebp
-; CHECK32-NEXT: movl %esp, %ebp
-; CHECK32-NEXT: andl $-8, %esp
-; CHECK32-NEXT: subl $32, %esp
-; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
-; CHECK32-NEXT: flds {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp)
-; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT: orl $3072, %eax # imm = 0xC00
-; CHECK32-NEXT: movw %ax, {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
-; CHECK32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK32-NEXT: movlps %xmm1, {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fildll {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fstps {{[0-9]+}}(%esp)
-; CHECK32-NEXT: cmpeqss {{[0-9]+}}(%esp), %xmm0
-; CHECK32-NEXT: movd %xmm0, %eax
-; CHECK32-NEXT: andl $1, %eax
-; CHECK32-NEXT: xorl %edx, %edx
-; CHECK32-NEXT: movl %ebp, %esp
-; CHECK32-NEXT: popl %ebp
-; CHECK32-NEXT: retl
+; SSE2-LABEL: isint64_float_return:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pushl %ebp
+; SSE2-NEXT: movl %esp, %ebp
+; SSE2-NEXT: andl $-8, %esp
+; SSE2-NEXT: subl $32, %esp
+; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; SSE2-NEXT: flds {{[0-9]+}}(%esp)
+; SSE2-NEXT: fnstcw {{[0-9]+}}(%esp)
+; SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; SSE2-NEXT: orl $3072, %eax # imm = 0xC00
+; SSE2-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE2-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE2-NEXT: fistpll {{[0-9]+}}(%esp)
+; SSE2-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT: movlps %xmm1, {{[0-9]+}}(%esp)
+; SSE2-NEXT: fildll {{[0-9]+}}(%esp)
+; SSE2-NEXT: fstps {{[0-9]+}}(%esp)
+; SSE2-NEXT: cmpeqss {{[0-9]+}}(%esp), %xmm0
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: xorl %edx, %edx
+; SSE2-NEXT: movl %ebp, %esp
+; SSE2-NEXT: popl %ebp
+; SSE2-NEXT: retl
;
; AVX512VL-LABEL: isint64_float_return:
; AVX512VL: # %bb.0:
@@ -144,42 +145,42 @@ define i64 @isint64_float_return(float %f) nounwind {
}
define i64 @isint64_return(double %d) nounwind {
-; CHECK64-LABEL: isint64_return:
-; CHECK64: # %bb.0:
-; CHECK64-NEXT: cvttsd2si %xmm0, %rax
-; CHECK64-NEXT: cvtsi2sd %rax, %xmm1
-; CHECK64-NEXT: cmpeqsd %xmm0, %xmm1
-; CHECK64-NEXT: movq %xmm1, %rax
-; CHECK64-NEXT: andl $1, %eax
-; CHECK64-NEXT: retq
+; X86-LABEL: isint64_return:
+; X86: # %bb.0:
+; X86-NEXT: cvttsd2si %xmm0, %rax
+; X86-NEXT: cvtsi2sd %rax, %xmm1
+; X86-NEXT: cmpeqsd %xmm0, %xmm1
+; X86-NEXT: movq %xmm1, %rax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: retq
;
-; CHECK32-LABEL: isint64_return:
-; CHECK32: # %bb.0:
-; CHECK32-NEXT: pushl %ebp
-; CHECK32-NEXT: movl %esp, %ebp
-; CHECK32-NEXT: andl $-8, %esp
-; CHECK32-NEXT: subl $32, %esp
-; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fldl {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp)
-; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT: orl $3072, %eax # imm = 0xC00
-; CHECK32-NEXT: movw %ax, {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
-; CHECK32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK32-NEXT: movlps %xmm1, {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fildll {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fstpl {{[0-9]+}}(%esp)
-; CHECK32-NEXT: cmpeqsd {{[0-9]+}}(%esp), %xmm0
-; CHECK32-NEXT: movd %xmm0, %eax
-; CHECK32-NEXT: andl $1, %eax
-; CHECK32-NEXT: xorl %edx, %edx
-; CHECK32-NEXT: movl %ebp, %esp
-; CHECK32-NEXT: popl %ebp
-; CHECK32-NEXT: retl
+; SSE2-LABEL: isint64_return:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pushl %ebp
+; SSE2-NEXT: movl %esp, %ebp
+; SSE2-NEXT: andl $-8, %esp
+; SSE2-NEXT: subl $32, %esp
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
+; SSE2-NEXT: fldl {{[0-9]+}}(%esp)
+; SSE2-NEXT: fnstcw {{[0-9]+}}(%esp)
+; SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; SSE2-NEXT: orl $3072, %eax # imm = 0xC00
+; SSE2-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE2-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE2-NEXT: fistpll {{[0-9]+}}(%esp)
+; SSE2-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT: movlps %xmm1, {{[0-9]+}}(%esp)
+; SSE2-NEXT: fildll {{[0-9]+}}(%esp)
+; SSE2-NEXT: fstpl {{[0-9]+}}(%esp)
+; SSE2-NEXT: cmpeqsd {{[0-9]+}}(%esp), %xmm0
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: xorl %edx, %edx
+; SSE2-NEXT: movl %ebp, %esp
+; SSE2-NEXT: popl %ebp
+; SSE2-NEXT: retl
;
; AVX512VL-LABEL: isint64_return:
; AVX512VL: # %bb.0:
@@ -196,36 +197,6 @@ define i64 @isint64_return(double %d) nounwind {
}
define i32 @isuint_return(double %d) nounwind {
-; CHECK64-LABEL: isuint_return:
-; CHECK64: # %bb.0:
-; CHECK64-NEXT: cvttsd2si %xmm0, %rax
-; CHECK64-NEXT: movl %eax, %eax
-; CHECK64-NEXT: cvtsi2sd %rax, %xmm1
-; CHECK64-NEXT: cmpeqsd %xmm0, %xmm1
-; CHECK64-NEXT: movq %xmm1, %rax
-; CHECK64-NEXT: andl $1, %eax
-; CHECK64-NEXT: # kill: def $eax killed $eax killed $rax
-; CHECK64-NEXT: retq
-;
-; CHECK32-LABEL: isuint_return:
-; CHECK32: # %bb.0:
-; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK32-NEXT: cvttsd2si %xmm0, %eax
-; CHECK32-NEXT: movl %eax, %ecx
-; CHECK32-NEXT: sarl $31, %ecx
-; CHECK32-NEXT: movapd %xmm0, %xmm1
-; CHECK32-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; CHECK32-NEXT: cvttsd2si %xmm1, %edx
-; CHECK32-NEXT: andl %ecx, %edx
-; CHECK32-NEXT: orl %eax, %edx
-; CHECK32-NEXT: movd %edx, %xmm1
-; CHECK32-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; CHECK32-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1
-; CHECK32-NEXT: movd %xmm1, %eax
-; CHECK32-NEXT: andl $1, %eax
-; CHECK32-NEXT: retl
-;
; AVX512-NODQ-LABEL: isuint_return:
; AVX512-NODQ: # %bb.0:
; AVX512-NODQ-NEXT: vcvttsd2usi %xmm0, %eax
@@ -234,6 +205,36 @@ define i32 @isuint_return(double %d) nounwind {
; AVX512-NODQ-NEXT: kmovw %k0, %eax
; AVX512-NODQ-NEXT: retq
;
+; X86-LABEL: isuint_return:
+; X86: # %bb.0:
+; X86-NEXT: cvttsd2si %xmm0, %rax
+; X86-NEXT: movl %eax, %eax
+; X86-NEXT: cvtsi2sd %rax, %xmm1
+; X86-NEXT: cmpeqsd %xmm0, %xmm1
+; X86-NEXT: movq %xmm1, %rax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: # kill: def $eax killed $eax killed $rax
+; X86-NEXT: retq
+;
+; SSE2-LABEL: isuint_return:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: cvttsd2si %xmm0, %eax
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: sarl $31, %ecx
+; SSE2-NEXT: movapd %xmm0, %xmm1
+; SSE2-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; SSE2-NEXT: cvttsd2si %xmm1, %edx
+; SSE2-NEXT: andl %ecx, %edx
+; SSE2-NEXT: orl %eax, %edx
+; SSE2-NEXT: movd %edx, %xmm1
+; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; SSE2-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; SSE2-NEXT: cmpeqsd %xmm0, %xmm1
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: retl
+;
; AVX512VL-LABEL: isuint_return:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttsd2usi %xmm0, %eax
@@ -249,25 +250,6 @@ define i32 @isuint_return(double %d) nounwind {
}
define i32 @isuint_float_return(float %f) nounwind {
-; CHECK64-LABEL: isuint_float_return:
-; CHECK64: # %bb.0:
-; CHECK64-NEXT: cvttps2dq %xmm0, %xmm1
-; CHECK64-NEXT: cvtdq2ps %xmm1, %xmm1
-; CHECK64-NEXT: cmpeqss %xmm0, %xmm1
-; CHECK64-NEXT: movd %xmm1, %eax
-; CHECK64-NEXT: andl $1, %eax
-; CHECK64-NEXT: retq
-;
-; CHECK32-LABEL: isuint_float_return:
-; CHECK32: # %bb.0:
-; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK32-NEXT: cvttps2dq %xmm0, %xmm1
-; CHECK32-NEXT: cvtdq2ps %xmm1, %xmm1
-; CHECK32-NEXT: cmpeqss %xmm0, %xmm1
-; CHECK32-NEXT: movd %xmm1, %eax
-; CHECK32-NEXT: andl $1, %eax
-; CHECK32-NEXT: retl
-;
; AVX512-NODQ-LABEL: isuint_float_return:
; AVX512-NODQ: # %bb.0:
; AVX512-NODQ-NEXT: vcvttps2dq %xmm0, %xmm1
@@ -276,6 +258,25 @@ define i32 @isuint_float_return(float %f) nounwind {
; AVX512-NODQ-NEXT: kmovw %k0, %eax
; AVX512-NODQ-NEXT: retq
;
+; X86-LABEL: isuint_float_return:
+; X86: # %bb.0:
+; X86-NEXT: cvttps2dq %xmm0, %xmm1
+; X86-NEXT: cvtdq2ps %xmm1, %xmm1
+; X86-NEXT: cmpeqss %xmm0, %xmm1
+; X86-NEXT: movd %xmm1, %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: retq
+;
+; SSE2-LABEL: isuint_float_return:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: cvttps2dq %xmm0, %xmm1
+; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1
+; SSE2-NEXT: cmpeqss %xmm0, %xmm1
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: retl
+;
; AVX512VL-LABEL: isuint_float_return:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm1
@@ -291,71 +292,6 @@ define i32 @isuint_float_return(float %f) nounwind {
}
define i64 @isuint64_return(double %d) nounwind {
-; CHECK64-LABEL: isuint64_return:
-; CHECK64: # %bb.0:
-; CHECK64-NEXT: cvttsd2si %xmm0, %rax
-; CHECK64-NEXT: movq %rax, %rcx
-; CHECK64-NEXT: sarq $63, %rcx
-; CHECK64-NEXT: movapd %xmm0, %xmm1
-; CHECK64-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK64-NEXT: cvttsd2si %xmm1, %rdx
-; CHECK64-NEXT: andq %rcx, %rdx
-; CHECK64-NEXT: orq %rax, %rdx
-; CHECK64-NEXT: movq %rdx, %xmm1
-; CHECK64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
-; CHECK64-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK64-NEXT: movapd %xmm1, %xmm2
-; CHECK64-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
-; CHECK64-NEXT: addsd %xmm1, %xmm2
-; CHECK64-NEXT: cmpeqsd %xmm0, %xmm2
-; CHECK64-NEXT: movq %xmm2, %rax
-; CHECK64-NEXT: andl $1, %eax
-; CHECK64-NEXT: retq
-;
-; CHECK32-LABEL: isuint64_return:
-; CHECK32: # %bb.0:
-; CHECK32-NEXT: pushl %ebp
-; CHECK32-NEXT: movl %esp, %ebp
-; CHECK32-NEXT: andl $-8, %esp
-; CHECK32-NEXT: subl $16, %esp
-; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK32-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
-; CHECK32-NEXT: ucomisd %xmm0, %xmm1
-; CHECK32-NEXT: jbe .LBB6_2
-; CHECK32-NEXT: # %bb.1:
-; CHECK32-NEXT: xorpd %xmm1, %xmm1
-; CHECK32-NEXT: .LBB6_2:
-; CHECK32-NEXT: movapd %xmm0, %xmm2
-; CHECK32-NEXT: subsd %xmm1, %xmm2
-; CHECK32-NEXT: movsd %xmm2, {{[0-9]+}}(%esp)
-; CHECK32-NEXT: setbe %al
-; CHECK32-NEXT: fldl {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp)
-; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; CHECK32-NEXT: orl $3072, %ecx # imm = 0xC00
-; CHECK32-NEXT: movw %cx, {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
-; CHECK32-NEXT: movzbl %al, %eax
-; CHECK32-NEXT: shll $31, %eax
-; CHECK32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT: movd %eax, %xmm1
-; CHECK32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; CHECK32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1]
-; CHECK32-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
-; CHECK32-NEXT: movapd %xmm2, %xmm1
-; CHECK32-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
-; CHECK32-NEXT: addsd %xmm2, %xmm1
-; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1
-; CHECK32-NEXT: movd %xmm1, %eax
-; CHECK32-NEXT: andl $1, %eax
-; CHECK32-NEXT: xorl %edx, %edx
-; CHECK32-NEXT: movl %ebp, %esp
-; CHECK32-NEXT: popl %ebp
-; CHECK32-NEXT: retl
-;
; AVX512-NODQ-LABEL: isuint64_return:
; AVX512-NODQ: # %bb.0:
; AVX512-NODQ-NEXT: vcvttsd2usi %xmm0, %rax
@@ -364,6 +300,71 @@ define i64 @isuint64_return(double %d) nounwind {
; AVX512-NODQ-NEXT: kmovw %k0, %eax
; AVX512-NODQ-NEXT: retq
;
+; X86-LABEL: isuint64_return:
+; X86: # %bb.0:
+; X86-NEXT: cvttsd2si %xmm0, %rax
+; X86-NEXT: movq %rax, %rcx
+; X86-NEXT: sarq $63, %rcx
+; X86-NEXT: movapd %xmm0, %xmm1
+; X86-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; X86-NEXT: cvttsd2si %xmm1, %rdx
+; X86-NEXT: andq %rcx, %rdx
+; X86-NEXT: orq %rax, %rdx
+; X86-NEXT: movq %rdx, %xmm1
+; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; X86-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; X86-NEXT: movapd %xmm1, %xmm2
+; X86-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
+; X86-NEXT: addsd %xmm1, %xmm2
+; X86-NEXT: cmpeqsd %xmm0, %xmm2
+; X86-NEXT: movq %xmm2, %rax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: retq
+;
+; SSE2-LABEL: isuint64_return:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pushl %ebp
+; SSE2-NEXT: movl %esp, %ebp
+; SSE2-NEXT: andl $-8, %esp
+; SSE2-NEXT: subl $16, %esp
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
+; SSE2-NEXT: ucomisd %xmm0, %xmm1
+; SSE2-NEXT: jbe .LBB6_2
+; SSE2-NEXT: # %bb.1:
+; SSE2-NEXT: xorpd %xmm1, %xmm1
+; SSE2-NEXT: .LBB6_2:
+; SSE2-NEXT: movapd %xmm0, %xmm2
+; SSE2-NEXT: subsd %xmm1, %xmm2
+; SSE2-NEXT: movsd %xmm2, {{[0-9]+}}(%esp)
+; SSE2-NEXT: setbe %al
+; SSE2-NEXT: fldl {{[0-9]+}}(%esp)
+; SSE2-NEXT: fnstcw {{[0-9]+}}(%esp)
+; SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; SSE2-NEXT: orl $3072, %ecx # imm = 0xC00
+; SSE2-NEXT: movw %cx, {{[0-9]+}}(%esp)
+; SSE2-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE2-NEXT: fistpll {{[0-9]+}}(%esp)
+; SSE2-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE2-NEXT: movzbl %al, %eax
+; SSE2-NEXT: shll $31, %eax
+; SSE2-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1]
+; SSE2-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
+; SSE2-NEXT: movapd %xmm2, %xmm1
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
+; SSE2-NEXT: addsd %xmm2, %xmm1
+; SSE2-NEXT: cmpeqsd %xmm0, %xmm1
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: xorl %edx, %edx
+; SSE2-NEXT: movl %ebp, %esp
+; SSE2-NEXT: popl %ebp
+; SSE2-NEXT: retl
+;
; AVX512VL-LABEL: isuint64_return:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
@@ -379,86 +380,78 @@ define i64 @isuint64_return(double %d) nounwind {
}
define i64 @isuint64_float_return(float %f) nounwind {
-; CHECK64-LABEL: isuint64_float_return:
-; CHECK64: # %bb.0:
-; CHECK64-NEXT: cvttss2si %xmm0, %rcx
-; CHECK64-NEXT: movq %rcx, %rdx
-; CHECK64-NEXT: sarq $63, %rdx
-; CHECK64-NEXT: movaps %xmm0, %xmm1
-; CHECK64-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK64-NEXT: cvttss2si %xmm1, %rax
-; CHECK64-NEXT: andq %rdx, %rax
-; CHECK64-NEXT: orq %rcx, %rax
-; CHECK64-NEXT: js .LBB7_1
-; CHECK64-NEXT: # %bb.2:
-; CHECK64-NEXT: xorps %xmm1, %xmm1
-; CHECK64-NEXT: cvtsi2ss %rax, %xmm1
-; CHECK64-NEXT: jmp .LBB7_3
-; CHECK64-NEXT: .LBB7_1:
-; CHECK64-NEXT: movq %rax, %rcx
-; CHECK64-NEXT: shrq %rcx
-; CHECK64-NEXT: andl $1, %eax
-; CHECK64-NEXT: orq %rcx, %rax
-; CHECK64-NEXT: xorps %xmm1, %xmm1
-; CHECK64-NEXT: cvtsi2ss %rax, %xmm1
-; CHECK64-NEXT: addss %xmm1, %xmm1
-; CHECK64-NEXT: .LBB7_3:
-; CHECK64-NEXT: cmpeqss %xmm1, %xmm0
-; CHECK64-NEXT: movd %xmm0, %eax
-; CHECK64-NEXT: andl $1, %eax
-; CHECK64-NEXT: retq
+; X86-LABEL: isuint64_float_return:
+; X86: # %bb.0:
+; X86-NEXT: cvttss2si %xmm0, %rcx
+; X86-NEXT: movq %rcx, %rdx
+; X86-NEXT: sarq $63, %rdx
+; X86-NEXT: movaps %xmm0, %xmm1
+; X86-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; X86-NEXT: cvttss2si %xmm1, %rax
+; X86-NEXT: andq %rdx, %rax
+; X86-NEXT: orq %rcx, %rax
+; X86-NEXT: js .LBB7_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: xorps %xmm1, %xmm1
+; X86-NEXT: cvtsi2ss %rax, %xmm1
+; X86-NEXT: jmp .LBB7_3
+; X86-NEXT: .LBB7_1:
+; X86-NEXT: movq %rax, %rcx
+; X86-NEXT: shrq %rcx
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: orq %rcx, %rax
+; X86-NEXT: xorps %xmm1, %xmm1
+; X86-NEXT: cvtsi2ss %rax, %xmm1
+; X86-NEXT: addss %xmm1, %xmm1
+; X86-NEXT: .LBB7_3:
+; X86-NEXT: cmpeqss %xmm1, %xmm0
+; X86-NEXT: movd %xmm0, %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: retq
;
-; CHECK32-LABEL: isuint64_float_return:
-; CHECK32: # %bb.0:
-; CHECK32-NEXT: pushl %ebp
-; CHECK32-NEXT: movl %esp, %ebp
-; CHECK32-NEXT: andl $-8, %esp
-; CHECK32-NEXT: subl $32, %esp
-; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK32-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
-; CHECK32-NEXT: ucomiss %xmm0, %xmm1
-; CHECK32-NEXT: jbe .LBB7_2
-; CHECK32-NEXT: # %bb.1:
-; CHECK32-NEXT: xorps %xmm1, %xmm1
-; CHECK32-NEXT: .LBB7_2:
-; CHECK32-NEXT: movaps %xmm0, %xmm2
-; CHECK32-NEXT: subss %xmm1, %xmm2
-; CHECK32-NEXT: movss %xmm2, {{[0-9]+}}(%esp)
-; CHECK32-NEXT: setbe %al
-; CHECK32-NEXT: flds {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp)
-; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; CHECK32-NEXT: orl $3072, %ecx # imm = 0xC00
-; CHECK32-NEXT: movw %cx, {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
-; CHECK32-NEXT: movzbl %al, %eax
-; CHECK32-NEXT: shll $31, %eax
-; CHECK32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT: movd %eax, %xmm1
-; CHECK32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; CHECK32-NEXT: movq %xmm2, {{[0-9]+}}(%esp)
-; CHECK32-NEXT: shrl $31, %eax
-; CHECK32-NEXT: fildll {{[0-9]+}}(%esp)
-; CHECK32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
-; CHECK32-NEXT: fstps {{[0-9]+}}(%esp)
-; CHECK32-NEXT: cmpeqss {{[0-9]+}}(%esp), %xmm0
-; CHECK32-NEXT: movd %xmm0, %eax
-; CHECK32-NEXT: andl $1, %eax
-; CHECK32-NEXT: xorl %edx, %edx
-; CHECK32-NEXT: movl %ebp, %esp
-; CHECK32-NEXT: popl %ebp
-; CHECK32-NEXT: retl
-;
-; AVX512-NODQ-LABEL: isuint64_float_return:
-; AVX512-NODQ: # %bb.0:
-; AVX512-NODQ-NEXT: vcvttss2usi %xmm0, %rax
-; AVX512-NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
-; AVX512-NODQ-NEXT: vcmpeqss %xmm1, %xmm0, %k0
-; AVX512-NODQ-NEXT: kmovw %k0, %eax
-; AVX512-NODQ-NEXT: retq
+; SSE2-LABEL: isuint64_float_return:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pushl %ebp
+; SSE2-NEXT: movl %esp, %ebp
+; SSE2-NEXT: andl $-8, %esp
+; SSE2-NEXT: subl $32, %esp
+; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
+; SSE2-NEXT: ucomiss %xmm0, %xmm1
+; SSE2-NEXT: jbe .LBB7_2
+; SSE2-NEXT: # %bb.1:
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: .LBB7_2:
+; SSE2-NEXT: movaps %xmm0, %xmm2
+; SSE2-NEXT: subss %xmm1, %xmm2
+; SSE2-NEXT: movss %xmm2, {{[0-9]+}}(%esp)
+; SSE2-NEXT: setbe %al
+; SSE2-NEXT: flds {{[0-9]+}}(%esp)
+; SSE2-NEXT: fnstcw {{[0-9]+}}(%esp)
+; SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; SSE2-NEXT: orl $3072, %ecx # imm = 0xC00
+; SSE2-NEXT: movw %cx, {{[0-9]+}}(%esp)
+; SSE2-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE2-NEXT: fistpll {{[0-9]+}}(%esp)
+; SSE2-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE2-NEXT: movzbl %al, %eax
+; SSE2-NEXT: shll $31, %eax
+; SSE2-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: movq %xmm2, {{[0-9]+}}(%esp)
+; SSE2-NEXT: shrl $31, %eax
+; SSE2-NEXT: fildll {{[0-9]+}}(%esp)
+; SSE2-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
+; SSE2-NEXT: fstps {{[0-9]+}}(%esp)
+; SSE2-NEXT: cmpeqss {{[0-9]+}}(%esp), %xmm0
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: xorl %edx, %edx
+; SSE2-NEXT: movl %ebp, %esp
+; SSE2-NEXT: popl %ebp
+; SSE2-NEXT: retl
;
; AVX512VL-LABEL: isuint64_float_return:
; AVX512VL: # %bb.0:
@@ -477,46 +470,32 @@ define i64 @isuint64_float_return(float %f) nounwind {
declare void @foo()
define void @isint_branch(double %d) nounwind {
-; CHECK64-LABEL: isint_branch:
-; CHECK64: # %bb.0:
-; CHECK64-NEXT: cvttpd2dq %xmm0, %xmm1
-; CHECK64-NEXT: cvtdq2pd %xmm1, %xmm1
-; CHECK64-NEXT: ucomisd %xmm1, %xmm0
-; CHECK64-NEXT: jne .LBB8_2
-; CHECK64-NEXT: jp .LBB8_2
-; CHECK64-NEXT: # %bb.1: # %true
-; CHECK64-NEXT: pushq %rax
-; CHECK64-NEXT: callq foo at PLT
-; CHECK64-NEXT: popq %rax
-; CHECK64-NEXT: .LBB8_2: # %false
-; CHECK64-NEXT: retq
+; X86-LABEL: isint_branch:
+; X86: # %bb.0:
+; X86-NEXT: cvttpd2dq %xmm0, %xmm1
+; X86-NEXT: cvtdq2pd %xmm1, %xmm1
+; X86-NEXT: ucomisd %xmm1, %xmm0
+; X86-NEXT: jne .LBB8_2
+; X86-NEXT: jp .LBB8_2
+; X86-NEXT: # %bb.1: # %true
+; X86-NEXT: pushq %rax
+; X86-NEXT: callq foo at PLT
+; X86-NEXT: popq %rax
+; X86-NEXT: .LBB8_2: # %false
+; X86-NEXT: retq
;
-; CHECK32-LABEL: isint_branch:
-; CHECK32: # %bb.0:
-; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK32-NEXT: cvttpd2dq %xmm0, %xmm1
-; CHECK32-NEXT: cvtdq2pd %xmm1, %xmm1
-; CHECK32-NEXT: ucomisd %xmm1, %xmm0
-; CHECK32-NEXT: jne .LBB8_2
-; CHECK32-NEXT: jp .LBB8_2
-; CHECK32-NEXT: # %bb.1: # %true
-; CHECK32-NEXT: calll foo at PLT
-; CHECK32-NEXT: .LBB8_2: # %false
-; CHECK32-NEXT: retl
-;
-; AVX512-NODQ-LABEL: isint_branch:
-; AVX512-NODQ: # %bb.0:
-; AVX512-NODQ-NEXT: vcvttpd2dq %xmm0, %xmm1
-; AVX512-NODQ-NEXT: vcvtdq2pd %xmm1, %xmm1
-; AVX512-NODQ-NEXT: vucomisd %xmm1, %xmm0
-; AVX512-NODQ-NEXT: jne .LBB8_2
-; AVX512-NODQ-NEXT: jp .LBB8_2
-; AVX512-NODQ-NEXT: # %bb.1: # %true
-; AVX512-NODQ-NEXT: pushq %rax
-; AVX512-NODQ-NEXT: callq foo at PLT
-; AVX512-NODQ-NEXT: popq %rax
-; AVX512-NODQ-NEXT: .LBB8_2: # %false
-; AVX512-NODQ-NEXT: retq
+; SSE2-LABEL: isint_branch:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: cvttpd2dq %xmm0, %xmm1
+; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
+; SSE2-NEXT: ucomisd %xmm1, %xmm0
+; SSE2-NEXT: jne .LBB8_2
+; SSE2-NEXT: jp .LBB8_2
+; SSE2-NEXT: # %bb.1: # %true
+; SSE2-NEXT: calll foo at PLT
+; SSE2-NEXT: .LBB8_2: # %false
+; SSE2-NEXT: retl
;
; AVX512VL-LABEL: isint_branch:
; AVX512VL: # %bb.0:
>From 4726c5dde09dcbcd4f5396155c82c54f2394f69f Mon Sep 17 00:00:00 2001
From: Kavin Gnanapandithan <kavin.balag at gmail.com>
Date: Sat, 11 Oct 2025 16:43:02 -0400
Subject: [PATCH 06/11] Formatted code wwith clang-format
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2813991d9c1aa..9824bdd101098 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19964,7 +19964,7 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
unsigned NumElts = Width / IntSize;
VecIntVT = MVT::getVectorVT(IntVT, NumElts);
unsigned SrcElts, VTElts;
- // vcvttps2qq cannot convert v16f32 <-> v8i64
+ // vcvttps2qq cannot convert v16f32 <-> v8i64
if (IntVT == MVT::i64 && Width == 512) {
SrcElts = NumElts;
VTElts = NumElts;
@@ -19972,7 +19972,7 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
SrcElts = Width / SrcSize;
VTElts = Width / VTSize;
}
-
+
VecSrcVT = MVT::getVectorVT(SrcVT, SrcElts);
VecVT = MVT::getVectorVT(VT, VTElts);
// sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
>From 74a22ef4e0a4c59d6281209657d568d048b14408 Mon Sep 17 00:00:00 2001
From: Kavin Gnanapandithan <kavin.balag at gmail.com>
Date: Tue, 14 Oct 2025 17:37:13 -0400
Subject: [PATCH 07/11] Called lowerFPToIntToFP in LowerUINT_TO_FP and modified
the appropriate test cases.
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 5 ++
llvm/test/CodeGen/X86/fp-int-fp-cvt.ll | 86 +++++++++++++------------
llvm/test/CodeGen/X86/ftrunc.ll | 6 +-
llvm/test/CodeGen/X86/isint.ll | 30 +++------
llvm/test/CodeGen/X86/setoeq.ll | 58 ++++-------------
5 files changed, 73 insertions(+), 112 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9824bdd101098..375bad9ea1de4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20645,6 +20645,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
else if (isLegalConversion(SrcVT, DstVT, false, Subtarget))
return Op;
+ if (!IsStrict) {
+ if (SDValue V = lowerFPToIntToFP(Op, dl, DAG, Subtarget))
+ return V;
+ }
+
if (DstVT.isVector())
return lowerUINT_TO_FP_vec(Op, dl, DAG, Subtarget);
diff --git a/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
index c0a9c6113b9e8..724a259c33a89 100644
--- a/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
+++ b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
@@ -116,24 +116,15 @@ define float @scvtf32_i64(float %a0) {
define double @ucvtf64_i32(double %a0) {
; SSE-LABEL: ucvtf64_i32:
; SSE: # %bb.0:
-; SSE-NEXT: cvttsd2si %xmm0, %rax
-; SSE-NEXT: movl %eax, %eax
-; SSE-NEXT: xorps %xmm0, %xmm0
-; SSE-NEXT: cvtsi2sd %rax, %xmm0
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
; SSE-NEXT: retq
;
-; AVX2-LABEL: ucvtf64_i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vcvttsd2si %xmm0, %rax
-; AVX2-NEXT: movl %eax, %eax
-; AVX2-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: ucvtf64_i32:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
-; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: ucvtf64_i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
+; AVX-NEXT: retq
%ii = fptoui double %a0 to i32
%ff = uitofp i32 %ii to double
ret double %ff
@@ -173,11 +164,20 @@ define double @ucvtf64_i64(double %a0) {
; AVX2-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: ucvtf64_i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vcvttsd2usi %xmm0, %rax
-; AVX512-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0
-; AVX512-NEXT: retq
+; AVX512-VL-LABEL: ucvtf64_i64:
+; AVX512-VL: # %bb.0:
+; AVX512-VL-NEXT: vcvttpd2uqq %xmm0, %xmm0
+; AVX512-VL-NEXT: vcvtuqq2pd %xmm0, %xmm0
+; AVX512-VL-NEXT: retq
+;
+; AVX512-NOVL-LABEL: ucvtf64_i64:
+; AVX512-NOVL: # %bb.0:
+; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NOVL-NEXT: vcvttpd2uqq %zmm0, %zmm0
+; AVX512-NOVL-NEXT: vcvtuqq2pd %zmm0, %zmm0
+; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NOVL-NEXT: vzeroupper
+; AVX512-NOVL-NEXT: retq
%ii = fptoui double %a0 to i64
%ff = uitofp i64 %ii to double
ret double %ff
@@ -186,24 +186,15 @@ define double @ucvtf64_i64(double %a0) {
define float @ucvtf32_i32(float %a0) {
; SSE-LABEL: ucvtf32_i32:
; SSE: # %bb.0:
-; SSE-NEXT: cvttss2si %xmm0, %rax
-; SSE-NEXT: movl %eax, %eax
-; SSE-NEXT: xorps %xmm0, %xmm0
-; SSE-NEXT: cvtsi2ss %rax, %xmm0
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
; SSE-NEXT: retq
;
-; AVX2-LABEL: ucvtf32_i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vcvttss2si %xmm0, %rax
-; AVX2-NEXT: movl %eax, %eax
-; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: ucvtf32_i32:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vcvttss2usi %xmm0, %eax
-; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: ucvtf32_i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX-NEXT: retq
%ii = fptoui float %a0 to i32
%ff = uitofp i32 %ii to float
ret float %ff
@@ -256,12 +247,23 @@ define float @ucvtf32_i64(float %a0) {
; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: ucvtf32_i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vcvttss2usi %xmm0, %rax
-; AVX512-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
-; AVX512-NEXT: retq
+; AVX512-VL-LABEL: ucvtf32_i64:
+; AVX512-VL: # %bb.0:
+; AVX512-VL-NEXT: vcvttps2uqq %xmm0, %xmm0
+; AVX512-VL-NEXT: vcvtuqq2ps %xmm0, %xmm0
+; AVX512-VL-NEXT: retq
+;
+; AVX512-NOVL-LABEL: ucvtf32_i64:
+; AVX512-NOVL: # %bb.0:
+; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512-NOVL-NEXT: vcvttps2uqq %ymm0, %zmm0
+; AVX512-NOVL-NEXT: vcvtuqq2ps %zmm0, %ymm0
+; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NOVL-NEXT: vzeroupper
+; AVX512-NOVL-NEXT: retq
%ii = fptoui float %a0 to i64
%ff = uitofp i64 %ii to float
ret float %ff
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX512: {{.*}}
diff --git a/llvm/test/CodeGen/X86/ftrunc.ll b/llvm/test/CodeGen/X86/ftrunc.ll
index 9095fb1550e70..c608c70f813c2 100644
--- a/llvm/test/CodeGen/X86/ftrunc.ll
+++ b/llvm/test/CodeGen/X86/ftrunc.ll
@@ -10,10 +10,8 @@ declare i64 @llvm.fptosi.sat.i64.f64(double)
define float @trunc_unsigned_f32(float %x) #0 {
; SSE2-LABEL: trunc_unsigned_f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: cvttss2si %xmm0, %rax
-; SSE2-NEXT: movl %eax, %eax
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
+; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: trunc_unsigned_f32:
diff --git a/llvm/test/CodeGen/X86/isint.ll b/llvm/test/CodeGen/X86/isint.ll
index 691a56197ca32..30262f93501e6 100644
--- a/llvm/test/CodeGen/X86/isint.ll
+++ b/llvm/test/CodeGen/X86/isint.ll
@@ -207,9 +207,8 @@ define i32 @isuint_return(double %d) nounwind {
;
; X86-LABEL: isuint_return:
; X86: # %bb.0:
-; X86-NEXT: cvttsd2si %xmm0, %rax
-; X86-NEXT: movl %eax, %eax
-; X86-NEXT: cvtsi2sd %rax, %xmm1
+; X86-NEXT: cvttpd2dq %xmm0, %xmm1
+; X86-NEXT: cvtdq2pd %xmm1, %xmm1
; X86-NEXT: cmpeqsd %xmm0, %xmm1
; X86-NEXT: movq %xmm1, %rax
; X86-NEXT: andl $1, %eax
@@ -219,17 +218,8 @@ define i32 @isuint_return(double %d) nounwind {
; SSE2-LABEL: isuint_return:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT: cvttsd2si %xmm0, %eax
-; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: sarl $31, %ecx
-; SSE2-NEXT: movapd %xmm0, %xmm1
-; SSE2-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; SSE2-NEXT: cvttsd2si %xmm1, %edx
-; SSE2-NEXT: andl %ecx, %edx
-; SSE2-NEXT: orl %eax, %edx
-; SSE2-NEXT: movd %edx, %xmm1
-; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; SSE2-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; SSE2-NEXT: cvttpd2dq %xmm0, %xmm1
+; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
; SSE2-NEXT: cmpeqsd %xmm0, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: andl $1, %eax
@@ -237,8 +227,8 @@ define i32 @isuint_return(double %d) nounwind {
;
; AVX512VL-LABEL: isuint_return:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vcvttsd2usi %xmm0, %eax
-; AVX512VL-NEXT: vcvtusi2sd %eax, %xmm15, %xmm1
+; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm1
+; AVX512VL-NEXT: vcvtdq2pd %xmm1, %xmm1
; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
; AVX512VL-NEXT: kmovw %k0, %eax
; AVX512VL-NEXT: retq
@@ -367,8 +357,8 @@ define i64 @isuint64_return(double %d) nounwind {
;
; AVX512VL-LABEL: isuint64_return:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
-; AVX512VL-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1
+; AVX512VL-NEXT: vcvttpd2uqq %xmm0, %xmm1
+; AVX512VL-NEXT: vcvtuqq2pd %xmm1, %xmm1
; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
; AVX512VL-NEXT: kmovw %k0, %eax
; AVX512VL-NEXT: retq
@@ -455,8 +445,8 @@ define i64 @isuint64_float_return(float %f) nounwind {
;
; AVX512VL-LABEL: isuint64_float_return:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
-; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
+; AVX512VL-NEXT: vcvttps2uqq %xmm0, %xmm1
+; AVX512VL-NEXT: vcvtuqq2ps %xmm1, %xmm1
; AVX512VL-NEXT: vcmpeqss %xmm1, %xmm0, %k0
; AVX512VL-NEXT: kmovw %k0, %eax
; AVX512VL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/setoeq.ll b/llvm/test/CodeGen/X86/setoeq.ll
index 131e279aa645c..10e2ace08a86a 100644
--- a/llvm/test/CodeGen/X86/setoeq.ll
+++ b/llvm/test/CodeGen/X86/setoeq.ll
@@ -47,17 +47,8 @@ define zeroext i8 @oeq_f64_u32(double %x) nounwind readnone {
; SSE-LABEL: oeq_f64_u32:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: cvttsd2si %xmm0, %eax
-; SSE-NEXT: movl %eax, %ecx
-; SSE-NEXT: sarl $31, %ecx
-; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; SSE-NEXT: cvttsd2si %xmm1, %edx
-; SSE-NEXT: andl %ecx, %edx
-; SSE-NEXT: orl %eax, %edx
-; SSE-NEXT: movd %edx, %xmm1
-; SSE-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
+; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
; SSE-NEXT: cmpeqsd %xmm0, %xmm1
; SSE-NEXT: movd %xmm1, %eax
; SSE-NEXT: andl $1, %eax
@@ -67,16 +58,8 @@ define zeroext i8 @oeq_f64_u32(double %x) nounwind readnone {
; AVX-LABEL: oeq_f64_u32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vcvttsd2si %xmm0, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: sarl $31, %ecx
-; AVX-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
-; AVX-NEXT: vcvttsd2si %xmm1, %edx
-; AVX-NEXT: andl %ecx, %edx
-; AVX-NEXT: orl %eax, %edx
-; AVX-NEXT: vmovd %edx, %xmm1
-; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; AVX-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm1
+; AVX-NEXT: vcvtdq2pd %xmm1, %xmm1
; AVX-NEXT: vcmpeqsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: andl $1, %eax
@@ -86,8 +69,8 @@ define zeroext i8 @oeq_f64_u32(double %x) nounwind readnone {
; AVX512-LABEL: oeq_f64_u32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
-; AVX512-NEXT: vcvtusi2sd %eax, %xmm7, %xmm1
+; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm1
+; AVX512-NEXT: vcvtdq2pd %xmm1, %xmm1
; AVX512-NEXT: vcmpeqsd %xmm0, %xmm1, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
@@ -311,17 +294,8 @@ define zeroext i8 @une_f64_u32(double %x) nounwind readnone {
; SSE-LABEL: une_f64_u32:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: cvttsd2si %xmm0, %eax
-; SSE-NEXT: movl %eax, %ecx
-; SSE-NEXT: sarl $31, %ecx
-; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; SSE-NEXT: cvttsd2si %xmm1, %edx
-; SSE-NEXT: andl %ecx, %edx
-; SSE-NEXT: orl %eax, %edx
-; SSE-NEXT: movd %edx, %xmm1
-; SSE-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
+; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
; SSE-NEXT: cmpneqsd %xmm0, %xmm1
; SSE-NEXT: movd %xmm1, %eax
; SSE-NEXT: andl $1, %eax
@@ -331,16 +305,8 @@ define zeroext i8 @une_f64_u32(double %x) nounwind readnone {
; AVX-LABEL: une_f64_u32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vcvttsd2si %xmm0, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: sarl $31, %ecx
-; AVX-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
-; AVX-NEXT: vcvttsd2si %xmm1, %edx
-; AVX-NEXT: andl %ecx, %edx
-; AVX-NEXT: orl %eax, %edx
-; AVX-NEXT: vmovd %edx, %xmm1
-; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; AVX-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm1
+; AVX-NEXT: vcvtdq2pd %xmm1, %xmm1
; AVX-NEXT: vcmpneqsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: andl $1, %eax
@@ -350,8 +316,8 @@ define zeroext i8 @une_f64_u32(double %x) nounwind readnone {
; AVX512-LABEL: une_f64_u32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
-; AVX512-NEXT: vcvtusi2sd %eax, %xmm7, %xmm1
+; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm1
+; AVX512-NEXT: vcvtdq2pd %xmm1, %xmm1
; AVX512-NEXT: vcmpneqsd %xmm0, %xmm1, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
>From 5632c0ce66a9f1b7d00bed0305a5af41cc24625d Mon Sep 17 00:00:00 2001
From: Kavin Gnanapandithan <kavin.balag at gmail.com>
Date: Tue, 14 Oct 2025 18:15:19 -0400
Subject: [PATCH 08/11] Removed unnecessory IsStrict conditioin
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 375bad9ea1de4..1bb000ad51135 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20645,10 +20645,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
else if (isLegalConversion(SrcVT, DstVT, false, Subtarget))
return Op;
- if (!IsStrict) {
- if (SDValue V = lowerFPToIntToFP(Op, dl, DAG, Subtarget))
- return V;
- }
+ if (SDValue V = lowerFPToIntToFP(Op, dl, DAG, Subtarget))
+ return V;
if (DstVT.isVector())
return lowerUINT_TO_FP_vec(Op, dl, DAG, Subtarget);
>From b0a20acf87625030997a2defbbc5aa86ad7232a2 Mon Sep 17 00:00:00 2001
From: Kavin Gnanapandithan <kavin.balag at gmail.com>
Date: Tue, 14 Oct 2025 19:20:17 -0400
Subject: [PATCH 09/11] Modified fp-int-fp-cvt.ll
---
llvm/test/CodeGen/X86/fp-int-fp-cvt.ll | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
index 2bccfe47730cb..724a259c33a89 100644
--- a/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
+++ b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
@@ -266,5 +266,4 @@ define float @ucvtf32_i64(float %a0) {
ret float %ff
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX512-NOVL: {{.*}}
-; AVX512-VL: {{.*}}
+; AVX512: {{.*}}
>From 1eb12e6a7e9b6bb425d069715a3ff187d80ef676 Mon Sep 17 00:00:00 2001
From: Kavin Gnanapandithan <kavin.balag at gmail.com>
Date: Wed, 15 Oct 2025 14:31:36 -0400
Subject: [PATCH 10/11] [X86] LowerFPToIntToFP: return SDValue() for unsigned
fp -> int in SSE2
SSE2 cannot convert unsigned i32 to f32/f64, so return SDValue() in this case. Updated the corresponding X86 fp/int conversion test cases (fp-int-fp-cvt.ll, ftrunc.ll, isint.ll, setoeq.ll) to reflect this change.
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +
llvm/test/CodeGen/X86/fp-int-fp-cvt.ll | 48 +++++++++++++-------
llvm/test/CodeGen/X86/ftrunc.ll | 6 ++-
llvm/test/CodeGen/X86/isint.ll | 22 +++++++---
llvm/test/CodeGen/X86/setoeq.ll | 58 ++++++++++++++++++++-----
5 files changed, 100 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f72cb664277d5..6091c0ecf9e2f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19963,6 +19963,8 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
} else {
// SSE2
+ if (IsUnsigned)
+ return SDValue();
ToIntOpcode =
SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
ToFPOpcode =
diff --git a/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
index 724a259c33a89..ecddb2ad160be 100644
--- a/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
+++ b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
@@ -116,15 +116,24 @@ define float @scvtf32_i64(float %a0) {
define double @ucvtf64_i32(double %a0) {
; SSE-LABEL: ucvtf64_i32:
; SSE: # %bb.0:
-; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: movl %eax, %eax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2sd %rax, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: ucvtf64_i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX2-LABEL: ucvtf64_i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vcvttsd2si %xmm0, %rax
+; AVX2-NEXT: movl %eax, %eax
+; AVX2-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: ucvtf64_i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
+; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm0
+; AVX512-NEXT: retq
%ii = fptoui double %a0 to i32
%ff = uitofp i32 %ii to double
ret double %ff
@@ -186,15 +195,24 @@ define double @ucvtf64_i64(double %a0) {
define float @ucvtf32_i32(float %a0) {
; SSE-LABEL: ucvtf32_i32:
; SSE: # %bb.0:
-; SSE-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: movl %eax, %eax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ss %rax, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: ucvtf32_i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX2-LABEL: ucvtf32_i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vcvttss2si %xmm0, %rax
+; AVX2-NEXT: movl %eax, %eax
+; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: ucvtf32_i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvttss2usi %xmm0, %eax
+; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0
+; AVX512-NEXT: retq
%ii = fptoui float %a0 to i32
%ff = uitofp i32 %ii to float
ret float %ff
@@ -265,5 +283,3 @@ define float @ucvtf32_i64(float %a0) {
%ff = uitofp i64 %ii to float
ret float %ff
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX512: {{.*}}
diff --git a/llvm/test/CodeGen/X86/ftrunc.ll b/llvm/test/CodeGen/X86/ftrunc.ll
index c608c70f813c2..9095fb1550e70 100644
--- a/llvm/test/CodeGen/X86/ftrunc.ll
+++ b/llvm/test/CodeGen/X86/ftrunc.ll
@@ -10,8 +10,10 @@ declare i64 @llvm.fptosi.sat.i64.f64(double)
define float @trunc_unsigned_f32(float %x) #0 {
; SSE2-LABEL: trunc_unsigned_f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE2-NEXT: cvttss2si %xmm0, %rax
+; SSE2-NEXT: movl %eax, %eax
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: trunc_unsigned_f32:
diff --git a/llvm/test/CodeGen/X86/isint.ll b/llvm/test/CodeGen/X86/isint.ll
index 30262f93501e6..94128e34c52cd 100644
--- a/llvm/test/CodeGen/X86/isint.ll
+++ b/llvm/test/CodeGen/X86/isint.ll
@@ -207,8 +207,9 @@ define i32 @isuint_return(double %d) nounwind {
;
; X86-LABEL: isuint_return:
; X86: # %bb.0:
-; X86-NEXT: cvttpd2dq %xmm0, %xmm1
-; X86-NEXT: cvtdq2pd %xmm1, %xmm1
+; X86-NEXT: cvttsd2si %xmm0, %rax
+; X86-NEXT: movl %eax, %eax
+; X86-NEXT: cvtsi2sd %rax, %xmm1
; X86-NEXT: cmpeqsd %xmm0, %xmm1
; X86-NEXT: movq %xmm1, %rax
; X86-NEXT: andl $1, %eax
@@ -218,8 +219,17 @@ define i32 @isuint_return(double %d) nounwind {
; SSE2-LABEL: isuint_return:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT: cvttpd2dq %xmm0, %xmm1
-; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
+; SSE2-NEXT: cvttsd2si %xmm0, %eax
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: sarl $31, %ecx
+; SSE2-NEXT: movapd %xmm0, %xmm1
+; SSE2-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; SSE2-NEXT: cvttsd2si %xmm1, %edx
+; SSE2-NEXT: andl %ecx, %edx
+; SSE2-NEXT: orl %eax, %edx
+; SSE2-NEXT: movd %edx, %xmm1
+; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; SSE2-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; SSE2-NEXT: cmpeqsd %xmm0, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: andl $1, %eax
@@ -227,8 +237,8 @@ define i32 @isuint_return(double %d) nounwind {
;
; AVX512VL-LABEL: isuint_return:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm1
-; AVX512VL-NEXT: vcvtdq2pd %xmm1, %xmm1
+; AVX512VL-NEXT: vcvttsd2usi %xmm0, %eax
+; AVX512VL-NEXT: vcvtusi2sd %eax, %xmm15, %xmm1
; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
; AVX512VL-NEXT: kmovw %k0, %eax
; AVX512VL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/setoeq.ll b/llvm/test/CodeGen/X86/setoeq.ll
index 10e2ace08a86a..131e279aa645c 100644
--- a/llvm/test/CodeGen/X86/setoeq.ll
+++ b/llvm/test/CodeGen/X86/setoeq.ll
@@ -47,8 +47,17 @@ define zeroext i8 @oeq_f64_u32(double %x) nounwind readnone {
; SSE-LABEL: oeq_f64_u32:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
-; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
+; SSE-NEXT: cvttsd2si %xmm0, %eax
+; SSE-NEXT: movl %eax, %ecx
+; SSE-NEXT: sarl $31, %ecx
+; SSE-NEXT: movapd %xmm0, %xmm1
+; SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; SSE-NEXT: cvttsd2si %xmm1, %edx
+; SSE-NEXT: andl %ecx, %edx
+; SSE-NEXT: orl %eax, %edx
+; SSE-NEXT: movd %edx, %xmm1
+; SSE-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; SSE-NEXT: cmpeqsd %xmm0, %xmm1
; SSE-NEXT: movd %xmm1, %eax
; SSE-NEXT: andl $1, %eax
@@ -58,8 +67,16 @@ define zeroext i8 @oeq_f64_u32(double %x) nounwind readnone {
; AVX-LABEL: oeq_f64_u32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vcvttpd2dq %xmm0, %xmm1
-; AVX-NEXT: vcvtdq2pd %xmm1, %xmm1
+; AVX-NEXT: vcvttsd2si %xmm0, %eax
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: sarl $31, %ecx
+; AVX-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
+; AVX-NEXT: vcvttsd2si %xmm1, %edx
+; AVX-NEXT: andl %ecx, %edx
+; AVX-NEXT: orl %eax, %edx
+; AVX-NEXT: vmovd %edx, %xmm1
+; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; AVX-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; AVX-NEXT: vcmpeqsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: andl $1, %eax
@@ -69,8 +86,8 @@ define zeroext i8 @oeq_f64_u32(double %x) nounwind readnone {
; AVX512-LABEL: oeq_f64_u32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm1
-; AVX512-NEXT: vcvtdq2pd %xmm1, %xmm1
+; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
+; AVX512-NEXT: vcvtusi2sd %eax, %xmm7, %xmm1
; AVX512-NEXT: vcmpeqsd %xmm0, %xmm1, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
@@ -294,8 +311,17 @@ define zeroext i8 @une_f64_u32(double %x) nounwind readnone {
; SSE-LABEL: une_f64_u32:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
-; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
+; SSE-NEXT: cvttsd2si %xmm0, %eax
+; SSE-NEXT: movl %eax, %ecx
+; SSE-NEXT: sarl $31, %ecx
+; SSE-NEXT: movapd %xmm0, %xmm1
+; SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; SSE-NEXT: cvttsd2si %xmm1, %edx
+; SSE-NEXT: andl %ecx, %edx
+; SSE-NEXT: orl %eax, %edx
+; SSE-NEXT: movd %edx, %xmm1
+; SSE-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; SSE-NEXT: cmpneqsd %xmm0, %xmm1
; SSE-NEXT: movd %xmm1, %eax
; SSE-NEXT: andl $1, %eax
@@ -305,8 +331,16 @@ define zeroext i8 @une_f64_u32(double %x) nounwind readnone {
; AVX-LABEL: une_f64_u32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vcvttpd2dq %xmm0, %xmm1
-; AVX-NEXT: vcvtdq2pd %xmm1, %xmm1
+; AVX-NEXT: vcvttsd2si %xmm0, %eax
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: sarl $31, %ecx
+; AVX-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
+; AVX-NEXT: vcvttsd2si %xmm1, %edx
+; AVX-NEXT: andl %ecx, %edx
+; AVX-NEXT: orl %eax, %edx
+; AVX-NEXT: vmovd %edx, %xmm1
+; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; AVX-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; AVX-NEXT: vcmpneqsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: andl $1, %eax
@@ -316,8 +350,8 @@ define zeroext i8 @une_f64_u32(double %x) nounwind readnone {
; AVX512-LABEL: une_f64_u32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm1
-; AVX512-NEXT: vcvtdq2pd %xmm1, %xmm1
+; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
+; AVX512-NEXT: vcvtusi2sd %eax, %xmm7, %xmm1
; AVX512-NEXT: vcmpneqsd %xmm0, %xmm1, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
>From 819fcd818f1df6777be248d3facf07847614e56c Mon Sep 17 00:00:00 2001
From: Kavin Gnanapandithan <kavin.balag at gmail.com>
Date: Tue, 28 Oct 2025 19:05:52 -0400
Subject: [PATCH 11/11] [X86] Added optimizations for fp to signed & unsigned
i32 conversions.
Extends lowerFPToIntToFP to support i32 conversions on both VLX + DQ
and DQ targets, as well as modifies appropriate test cases.
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 23 ++++-----
llvm/test/CodeGen/X86/fp-int-fp-cvt.ll | 65 +++++++++++++++++++------
llvm/test/CodeGen/X86/isint.ll | 4 +-
llvm/test/CodeGen/X86/setoeq.ll | 8 +--
4 files changed, 68 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6091c0ecf9e2f..fbca1b23cba42 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19933,7 +19933,7 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
// See if we have 128-bit vector cast instructions for this type of cast.
// We need cvttps2dq/cvttpd2dq and cvtdq2ps/cvtdq2pd.
if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
- !(IntVT == MVT::i32 || (IntVT == MVT::i64 && Subtarget.hasDQI())))
+ (IntVT != MVT::i32 && IntVT != MVT::i64))
return SDValue();
unsigned SrcSize = SrcVT.getSizeInBits();
@@ -19943,7 +19943,7 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
unsigned Width = 128;
bool IsUnsigned = CastToInt.getOpcode() == ISD::FP_TO_UINT;
- if (Subtarget.hasVLX() && IntVT == MVT::i64) {
+ if (Subtarget.hasVLX() && Subtarget.hasDQI()) {
// AVX512DQ+VLX
if (IsUnsigned) {
ToIntOpcode =
@@ -19956,14 +19956,14 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
ToFPOpcode =
IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
}
- } else if (IntVT == MVT::i64) {
- // Need to extend width for AVX512DQ without AVX512VL
+ } else if (Subtarget.hasDQI()) {
+ // Need to extend width for AVX512DQ without AVX512VL.
Width = 512;
ToIntOpcode = CastToInt.getOpcode();
ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
} else {
- // SSE2
- if (IsUnsigned)
+ // SSE2 can only perform f64/f32 <-> i32 signed.
+ if (IsUnsigned || IntVT == MVT::i64)
return SDValue();
ToIntOpcode =
SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
@@ -19972,18 +19972,19 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
}
MVT VecSrcVT, VecIntVT, VecVT;
- unsigned NumElts = Width / IntSize;
- VecIntVT = MVT::getVectorVT(IntVT, NumElts);
+ unsigned NumElts;
unsigned SrcElts, VTElts;
- // vcvttps2qq cannot convert v16f32 <-> v8i64
- if (IntVT == MVT::i64 && Width == 512) {
+ // Some conversions are only legal with uniform vector sizes on AVXDQ.
+ if (Width == 512) {
+ NumElts = std::min(Width / IntSize, Width / SrcSize);
SrcElts = NumElts;
VTElts = NumElts;
} else {
+ NumElts = Width / IntSize;
SrcElts = Width / SrcSize;
VTElts = Width / VTSize;
}
-
+ VecIntVT = MVT::getVectorVT(IntVT, NumElts);
VecSrcVT = MVT::getVectorVT(SrcVT, SrcElts);
VecVT = MVT::getVectorVT(VT, VTElts);
// sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
diff --git a/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
index ecddb2ad160be..9f82140399bef 100644
--- a/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
+++ b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
@@ -16,11 +16,26 @@ define double @scvtf64_i32(double %a0) {
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: scvtf64_i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX2-LABEL: scvtf64_i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX2-NEXT: vcvtdq2pd %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-VL-LABEL: scvtf64_i32:
+; AVX512-VL: # %bb.0:
+; AVX512-VL-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512-VL-NEXT: vcvtdq2pd %xmm0, %xmm0
+; AVX512-VL-NEXT: retq
+;
+; AVX512-NOVL-LABEL: scvtf64_i32:
+; AVX512-NOVL: # %bb.0:
+; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0
+; AVX512-NOVL-NEXT: vcvtdq2pd %ymm0, %zmm0
+; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NOVL-NEXT: vzeroupper
+; AVX512-NOVL-NEXT: retq
%ii = fptosi double %a0 to i32
%ff = sitofp i32 %ii to double
ret double %ff
@@ -129,11 +144,20 @@ define double @ucvtf64_i32(double %a0) {
; AVX2-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: ucvtf64_i32:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
-; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm0
-; AVX512-NEXT: retq
+; AVX512-VL-LABEL: ucvtf64_i32:
+; AVX512-VL: # %bb.0:
+; AVX512-VL-NEXT: vcvttpd2udq %xmm0, %xmm0
+; AVX512-VL-NEXT: vcvtudq2pd %xmm0, %xmm0
+; AVX512-VL-NEXT: retq
+;
+; AVX512-NOVL-LABEL: ucvtf64_i32:
+; AVX512-NOVL: # %bb.0:
+; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NOVL-NEXT: vcvttpd2udq %zmm0, %ymm0
+; AVX512-NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0
+; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NOVL-NEXT: vzeroupper
+; AVX512-NOVL-NEXT: retq
%ii = fptoui double %a0 to i32
%ff = uitofp i32 %ii to double
ret double %ff
@@ -208,11 +232,20 @@ define float @ucvtf32_i32(float %a0) {
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: ucvtf32_i32:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vcvttss2usi %xmm0, %eax
-; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0
-; AVX512-NEXT: retq
+; AVX512-VL-LABEL: ucvtf32_i32:
+; AVX512-VL: # %bb.0:
+; AVX512-VL-NEXT: vcvttps2udq %xmm0, %xmm0
+; AVX512-VL-NEXT: vcvtudq2ps %xmm0, %xmm0
+; AVX512-VL-NEXT: retq
+;
+; AVX512-NOVL-LABEL: ucvtf32_i32:
+; AVX512-NOVL: # %bb.0:
+; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NOVL-NEXT: vcvttps2udq %zmm0, %zmm0
+; AVX512-NOVL-NEXT: vcvtudq2ps %zmm0, %zmm0
+; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NOVL-NEXT: vzeroupper
+; AVX512-NOVL-NEXT: retq
%ii = fptoui float %a0 to i32
%ff = uitofp i32 %ii to float
ret float %ff
@@ -283,3 +316,5 @@ define float @ucvtf32_i64(float %a0) {
%ff = uitofp i64 %ii to float
ret float %ff
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX512: {{.*}}
diff --git a/llvm/test/CodeGen/X86/isint.ll b/llvm/test/CodeGen/X86/isint.ll
index 94128e34c52cd..3b6f95d708558 100644
--- a/llvm/test/CodeGen/X86/isint.ll
+++ b/llvm/test/CodeGen/X86/isint.ll
@@ -237,8 +237,8 @@ define i32 @isuint_return(double %d) nounwind {
;
; AVX512VL-LABEL: isuint_return:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vcvttsd2usi %xmm0, %eax
-; AVX512VL-NEXT: vcvtusi2sd %eax, %xmm15, %xmm1
+; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm1
+; AVX512VL-NEXT: vcvtudq2pd %xmm1, %xmm1
; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
; AVX512VL-NEXT: kmovw %k0, %eax
; AVX512VL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/setoeq.ll b/llvm/test/CodeGen/X86/setoeq.ll
index 131e279aa645c..87317879abbc8 100644
--- a/llvm/test/CodeGen/X86/setoeq.ll
+++ b/llvm/test/CodeGen/X86/setoeq.ll
@@ -86,8 +86,8 @@ define zeroext i8 @oeq_f64_u32(double %x) nounwind readnone {
; AVX512-LABEL: oeq_f64_u32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
-; AVX512-NEXT: vcvtusi2sd %eax, %xmm7, %xmm1
+; AVX512-NEXT: vcvttpd2udq %xmm0, %xmm1
+; AVX512-NEXT: vcvtudq2pd %xmm1, %xmm1
; AVX512-NEXT: vcmpeqsd %xmm0, %xmm1, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
@@ -350,8 +350,8 @@ define zeroext i8 @une_f64_u32(double %x) nounwind readnone {
; AVX512-LABEL: une_f64_u32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
-; AVX512-NEXT: vcvtusi2sd %eax, %xmm7, %xmm1
+; AVX512-NEXT: vcvttpd2udq %xmm0, %xmm1
+; AVX512-NEXT: vcvtudq2pd %xmm1, %xmm1
; AVX512-NEXT: vcmpneqsd %xmm0, %xmm1, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
More information about the llvm-commits
mailing list