[llvm] bf60a5a - [X86] Covert unsigned int 0 to float-point with FILD instruction.
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 13 05:06:36 PDT 2022
Author: Liu, Chen3
Date: 2022-04-13T20:06:15+08:00
New Revision: bf60a5af0a21323f257719a08d57b28a3389b283
URL: https://github.com/llvm/llvm-project/commit/bf60a5af0a21323f257719a08d57b28a3389b283
DIFF: https://github.com/llvm/llvm-project/commit/bf60a5af0a21323f257719a08d57b28a3389b283.diff
LOG: [X86] Covert unsigned int 0 to float-point with FILD instruction.
unsinged int 0 will be convert to float/double -0.0 when the rounding
mode is set to 'FE_DOWNWARD'. Using FILD instruction instead of SSE
instructions on 32-bit target if the strictfp is enabled.
Differential Revision: https://reviews.llvm.org/D123660
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/fp-intrinsics.ll
llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a1b3c5b1ab57c..1a28c3c5ddcdf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21238,7 +21238,10 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
if (SrcVT == MVT::i64 && DstVT == MVT::f64 && Subtarget.hasSSE2() &&
!IsStrict)
return LowerUINT_TO_FP_i64(Op, DAG, Subtarget);
- if (SrcVT == MVT::i32 && Subtarget.hasSSE2() && DstVT != MVT::f80)
+ // The transform for i32->f64/f32 isn't correct for 0 when rounding to
+ // negative infinity. So disable under strictfp. Using FILD instead.
+ if (SrcVT == MVT::i32 && Subtarget.hasSSE2() && DstVT != MVT::f80 &&
+ !IsStrict)
return LowerUINT_TO_FP_i32(Op, DAG, Subtarget);
if (Subtarget.is64Bit() && SrcVT == MVT::i64 &&
(DstVT == MVT::f32 || DstVT == MVT::f64))
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll
index af9a640fd1cd7..378a32ea61f7c 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -2434,15 +2434,16 @@ define double @uifdi(i32 %x) #0 {
;
; X86-SSE-LABEL: uifdi:
; X86-SSE: # %bb.0: # %entry
-; X86-SSE-NEXT: subl $12, %esp
-; X86-SSE-NEXT: .cfi_def_cfa_offset 16
-; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: orpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movsd %xmm0, (%esp)
-; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: subl $20, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 24
+; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: movl %eax, (%esp)
+; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: fildll (%esp)
+; X86-SSE-NEXT: fstpl {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-SSE-NEXT: wait
-; X86-SSE-NEXT: addl $12, %esp
+; X86-SSE-NEXT: addl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
@@ -2655,16 +2656,16 @@ define float @uiffi(i32 %x) #0 {
;
; X86-SSE-LABEL: uiffi:
; X86-SSE: # %bb.0: # %entry
-; X86-SSE-NEXT: pushl %eax
-; X86-SSE-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: orpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: cvtsd2ss %xmm0, %xmm0
-; X86-SSE-NEXT: movss %xmm0, (%esp)
-; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: subl $20, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 24
+; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: flds {{[0-9]+}}(%esp)
; X86-SSE-NEXT: wait
-; X86-SSE-NEXT: popl %eax
+; X86-SSE-NEXT: addl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll
index 51032e1299f71..738fec62e5f5c 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll
@@ -485,17 +485,23 @@ define float @uitofp_i16tof32(i16 %x) #0 {
define float @uitofp_i32tof32(i32 %x) #0 {
; SSE-X86-LABEL: uitofp_i32tof32:
; SSE-X86: # %bb.0:
-; SSE-X86-NEXT: pushl %eax
+; SSE-X86-NEXT: pushl %ebp
; SSE-X86-NEXT: .cfi_def_cfa_offset 8
-; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-X86-NEXT: orpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; SSE-X86-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; SSE-X86-NEXT: cvtsd2ss %xmm0, %xmm0
-; SSE-X86-NEXT: movss %xmm0, (%esp)
-; SSE-X86-NEXT: flds (%esp)
+; SSE-X86-NEXT: .cfi_offset %ebp, -8
+; SSE-X86-NEXT: movl %esp, %ebp
+; SSE-X86-NEXT: .cfi_def_cfa_register %ebp
+; SSE-X86-NEXT: andl $-8, %esp
+; SSE-X86-NEXT: subl $16, %esp
+; SSE-X86-NEXT: movl 8(%ebp), %eax
+; SSE-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: fildll {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: fstps {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: flds {{[0-9]+}}(%esp)
; SSE-X86-NEXT: wait
-; SSE-X86-NEXT: popl %eax
-; SSE-X86-NEXT: .cfi_def_cfa_offset 4
+; SSE-X86-NEXT: movl %ebp, %esp
+; SSE-X86-NEXT: popl %ebp
+; SSE-X86-NEXT: .cfi_def_cfa %esp, 4
; SSE-X86-NEXT: retl
;
; SSE-X64-LABEL: uitofp_i32tof32:
@@ -506,17 +512,23 @@ define float @uitofp_i32tof32(i32 %x) #0 {
;
; AVX1-X86-LABEL: uitofp_i32tof32:
; AVX1-X86: # %bb.0:
-; AVX1-X86-NEXT: pushl %eax
+; AVX1-X86-NEXT: pushl %ebp
; AVX1-X86-NEXT: .cfi_def_cfa_offset 8
-; AVX1-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX1-X86-NEXT: vorpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; AVX1-X86-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; AVX1-X86-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
-; AVX1-X86-NEXT: vmovss %xmm0, (%esp)
-; AVX1-X86-NEXT: flds (%esp)
+; AVX1-X86-NEXT: .cfi_offset %ebp, -8
+; AVX1-X86-NEXT: movl %esp, %ebp
+; AVX1-X86-NEXT: .cfi_def_cfa_register %ebp
+; AVX1-X86-NEXT: andl $-8, %esp
+; AVX1-X86-NEXT: subl $16, %esp
+; AVX1-X86-NEXT: movl 8(%ebp), %eax
+; AVX1-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; AVX1-X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; AVX1-X86-NEXT: fildll {{[0-9]+}}(%esp)
+; AVX1-X86-NEXT: fstps {{[0-9]+}}(%esp)
+; AVX1-X86-NEXT: flds {{[0-9]+}}(%esp)
; AVX1-X86-NEXT: wait
-; AVX1-X86-NEXT: popl %eax
-; AVX1-X86-NEXT: .cfi_def_cfa_offset 4
+; AVX1-X86-NEXT: movl %ebp, %esp
+; AVX1-X86-NEXT: popl %ebp
+; AVX1-X86-NEXT: .cfi_def_cfa %esp, 4
; AVX1-X86-NEXT: retl
;
; AVX1-X64-LABEL: uitofp_i32tof32:
@@ -1162,12 +1174,13 @@ define double @uitofp_i32tof64(i32 %x) #0 {
; SSE-X86-NEXT: movl %esp, %ebp
; SSE-X86-NEXT: .cfi_def_cfa_register %ebp
; SSE-X86-NEXT: andl $-8, %esp
-; SSE-X86-NEXT: subl $8, %esp
-; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-X86-NEXT: orpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; SSE-X86-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; SSE-X86-NEXT: movsd %xmm0, (%esp)
-; SSE-X86-NEXT: fldl (%esp)
+; SSE-X86-NEXT: subl $16, %esp
+; SSE-X86-NEXT: movl 8(%ebp), %eax
+; SSE-X86-NEXT: movl %eax, (%esp)
+; SSE-X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: fildll (%esp)
+; SSE-X86-NEXT: fstpl {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: fldl {{[0-9]+}}(%esp)
; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movl %ebp, %esp
; SSE-X86-NEXT: popl %ebp
@@ -1188,12 +1201,13 @@ define double @uitofp_i32tof64(i32 %x) #0 {
; AVX1-X86-NEXT: movl %esp, %ebp
; AVX1-X86-NEXT: .cfi_def_cfa_register %ebp
; AVX1-X86-NEXT: andl $-8, %esp
-; AVX1-X86-NEXT: subl $8, %esp
-; AVX1-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX1-X86-NEXT: vorpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; AVX1-X86-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; AVX1-X86-NEXT: vmovsd %xmm0, (%esp)
-; AVX1-X86-NEXT: fldl (%esp)
+; AVX1-X86-NEXT: subl $16, %esp
+; AVX1-X86-NEXT: movl 8(%ebp), %eax
+; AVX1-X86-NEXT: movl %eax, (%esp)
+; AVX1-X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; AVX1-X86-NEXT: fildll (%esp)
+; AVX1-X86-NEXT: fstpl {{[0-9]+}}(%esp)
+; AVX1-X86-NEXT: fldl {{[0-9]+}}(%esp)
; AVX1-X86-NEXT: wait
; AVX1-X86-NEXT: movl %ebp, %esp
; AVX1-X86-NEXT: popl %ebp
More information about the llvm-commits
mailing list