[llvm] r370699 - [X86] Don't use Expand for i32 fp_to_uint on SSE1/2 targets on 32-bit target.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 2 22:57:19 PDT 2019
Author: ctopper
Date: Mon Sep 2 22:57:18 2019
New Revision: 370699
URL: http://llvm.org/viewvc/llvm-project?rev=370699&view=rev
Log:
[X86] Don't use Expand for i32 fp_to_uint on SSE1/2 targets on 32-bit target.
Use Custom lowering instead. Fall back to default expansion only
when the scalar FP type belongs in an XMM register. This improves
lowering for i32 to fp80, and also i32 to double on SSE1 only.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/scalar-fp-to-i32.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=370699&r1=370698&r2=370699&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Sep 2 22:57:18 2019
@@ -287,19 +287,8 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
}
} else if (!Subtarget.useSoftFloat()) {
- // Since AVX is a superset of SSE3, only check for SSE here.
- if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
- // Expand FP_TO_UINT into a select.
- // FIXME: We would like to use a Custom expander here eventually to do
- // the optimal thing for SSE vs. the default expansion in the legalizer.
- setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
- else
- // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
- // With SSE3 we can use fisttpll to convert to a signed i64; without
- // SSE, we're stuck with a fistpll.
- setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
-
- setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
}
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
@@ -19425,6 +19414,11 @@ SDValue X86TargetLowering::LowerFP_TO_IN
if (UseSSEReg && IsSigned)
return Op;
+ // Use default expansion for SSE1/2 targets without SSE3. With SSE3 we can use
+ // fisttp.
+ if (!IsSigned && UseSSEReg && !Subtarget.hasSSE3())
+ return SDValue();
+
// Fall back to X87.
if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned))
return V;
Modified: llvm/trunk/test/CodeGen/X86/scalar-fp-to-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/scalar-fp-to-i32.ll?rev=370699&r1=370698&r2=370699&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/scalar-fp-to-i32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/scalar-fp-to-i32.ll Mon Sep 2 22:57:18 2019
@@ -257,74 +257,36 @@ define i32 @d_to_u32(double %a) nounwind
;
; SSE_32_WIN-LABEL: d_to_u32:
; SSE_32_WIN: # %bb.0:
+; SSE_32_WIN-NEXT: pushl %ebp
+; SSE_32_WIN-NEXT: movl %esp, %ebp
+; SSE_32_WIN-NEXT: andl $-8, %esp
; SSE_32_WIN-NEXT: subl $16, %esp
-; SSE_32_WIN-NEXT: fldl {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT: flds __real at 4f000000
-; SSE_32_WIN-NEXT: fld %st(1)
-; SSE_32_WIN-NEXT: fsub %st(1), %st
+; SSE_32_WIN-NEXT: fldl 8(%ebp)
; SSE_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT: fistpl {{[0-9]+}}(%esp)
+; SSE_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT: fnstcw (%esp)
-; SSE_32_WIN-NEXT: movzwl (%esp), %eax
-; SSE_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00
-; SSE_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT: fxch %st(1)
-; SSE_32_WIN-NEXT: fistl {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT: fldcw (%esp)
-; SSE_32_WIN-NEXT: fxch %st(1)
-; SSE_32_WIN-NEXT: fucompi %st(1), %st
-; SSE_32_WIN-NEXT: fstp %st(0)
-; SSE_32_WIN-NEXT: jbe LBB2_1
-; SSE_32_WIN-NEXT: # %bb.2:
; SSE_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SSE_32_WIN-NEXT: addl $16, %esp
-; SSE_32_WIN-NEXT: retl
-; SSE_32_WIN-NEXT: LBB2_1:
-; SSE_32_WIN-NEXT: movl $-2147483648, %eax # imm = 0x80000000
-; SSE_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; SSE_32_WIN-NEXT: addl $16, %esp
+; SSE_32_WIN-NEXT: movl %ebp, %esp
+; SSE_32_WIN-NEXT: popl %ebp
; SSE_32_WIN-NEXT: retl
;
; SSE_32_LIN-LABEL: d_to_u32:
; SSE_32_LIN: # %bb.0:
-; SSE_32_LIN-NEXT: subl $16, %esp
+; SSE_32_LIN-NEXT: subl $20, %esp
; SSE_32_LIN-NEXT: fldl {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT: flds {{\.LCPI.*}}
-; SSE_32_LIN-NEXT: fld %st(1)
-; SSE_32_LIN-NEXT: fsub %st(1), %st
; SSE_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT: fistpl {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT: fnstcw (%esp)
-; SSE_32_LIN-NEXT: movzwl (%esp), %eax
-; SSE_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00
-; SSE_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT: fxch %st(1)
-; SSE_32_LIN-NEXT: fistl {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT: fldcw (%esp)
-; SSE_32_LIN-NEXT: fxch %st(1)
-; SSE_32_LIN-NEXT: fucompi %st(1), %st
-; SSE_32_LIN-NEXT: fstp %st(0)
-; SSE_32_LIN-NEXT: jbe .LBB2_1
-; SSE_32_LIN-NEXT: # %bb.2:
; SSE_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SSE_32_LIN-NEXT: addl $16, %esp
-; SSE_32_LIN-NEXT: retl
-; SSE_32_LIN-NEXT: .LBB2_1:
-; SSE_32_LIN-NEXT: movl $-2147483648, %eax # imm = 0x80000000
-; SSE_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; SSE_32_LIN-NEXT: addl $16, %esp
+; SSE_32_LIN-NEXT: addl $20, %esp
; SSE_32_LIN-NEXT: retl
;
; X87_WIN-LABEL: d_to_u32:
@@ -507,52 +469,36 @@ define i32 @x_to_u32(x86_fp80 %a) nounwi
;
; SSE2_32_WIN-LABEL: x_to_u32:
; SSE2_32_WIN: # %bb.0:
-; SSE2_32_WIN-NEXT: subl $8, %esp
-; SSE2_32_WIN-NEXT: fldt {{[0-9]+}}(%esp)
-; SSE2_32_WIN-NEXT: flds __real at 4f000000
-; SSE2_32_WIN-NEXT: fld %st(1)
-; SSE2_32_WIN-NEXT: fsub %st(1), %st
-; SSE2_32_WIN-NEXT: xorl %eax, %eax
-; SSE2_32_WIN-NEXT: fxch %st(1)
-; SSE2_32_WIN-NEXT: fucompi %st(2), %st
-; SSE2_32_WIN-NEXT: fcmovnbe %st(1), %st
-; SSE2_32_WIN-NEXT: fstp %st(1)
-; SSE2_32_WIN-NEXT: setbe %al
-; SSE2_32_WIN-NEXT: fnstcw (%esp)
-; SSE2_32_WIN-NEXT: movzwl (%esp), %ecx
-; SSE2_32_WIN-NEXT: orl $3072, %ecx # imm = 0xC00
-; SSE2_32_WIN-NEXT: movw %cx, {{[0-9]+}}(%esp)
+; SSE2_32_WIN-NEXT: pushl %ebp
+; SSE2_32_WIN-NEXT: movl %esp, %ebp
+; SSE2_32_WIN-NEXT: andl $-8, %esp
+; SSE2_32_WIN-NEXT: subl $16, %esp
+; SSE2_32_WIN-NEXT: fldt 8(%ebp)
+; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp)
+; SSE2_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; SSE2_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00
+; SSE2_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE2_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
-; SSE2_32_WIN-NEXT: fistpl {{[0-9]+}}(%esp)
-; SSE2_32_WIN-NEXT: fldcw (%esp)
-; SSE2_32_WIN-NEXT: shll $31, %eax
-; SSE2_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; SSE2_32_WIN-NEXT: addl $8, %esp
+; SSE2_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE2_32_WIN-NEXT: movl %ebp, %esp
+; SSE2_32_WIN-NEXT: popl %ebp
; SSE2_32_WIN-NEXT: retl
;
; SSE2_32_LIN-LABEL: x_to_u32:
; SSE2_32_LIN: # %bb.0:
-; SSE2_32_LIN-NEXT: subl $8, %esp
+; SSE2_32_LIN-NEXT: subl $20, %esp
; SSE2_32_LIN-NEXT: fldt {{[0-9]+}}(%esp)
-; SSE2_32_LIN-NEXT: flds {{\.LCPI.*}}
-; SSE2_32_LIN-NEXT: fld %st(1)
-; SSE2_32_LIN-NEXT: fsub %st(1), %st
-; SSE2_32_LIN-NEXT: xorl %eax, %eax
-; SSE2_32_LIN-NEXT: fxch %st(1)
-; SSE2_32_LIN-NEXT: fucompi %st(2), %st
-; SSE2_32_LIN-NEXT: fcmovnbe %st(1), %st
-; SSE2_32_LIN-NEXT: fstp %st(1)
-; SSE2_32_LIN-NEXT: setbe %al
-; SSE2_32_LIN-NEXT: fnstcw (%esp)
-; SSE2_32_LIN-NEXT: movzwl (%esp), %ecx
-; SSE2_32_LIN-NEXT: orl $3072, %ecx # imm = 0xC00
-; SSE2_32_LIN-NEXT: movw %cx, {{[0-9]+}}(%esp)
+; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp)
+; SSE2_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; SSE2_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00
+; SSE2_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
-; SSE2_32_LIN-NEXT: fistpl {{[0-9]+}}(%esp)
-; SSE2_32_LIN-NEXT: fldcw (%esp)
-; SSE2_32_LIN-NEXT: shll $31, %eax
-; SSE2_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; SSE2_32_LIN-NEXT: addl $8, %esp
+; SSE2_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp)
+; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE2_32_LIN-NEXT: addl $20, %esp
; SSE2_32_LIN-NEXT: retl
;
; SSE2_64_WIN-LABEL: x_to_u32:
@@ -585,52 +531,36 @@ define i32 @x_to_u32(x86_fp80 %a) nounwi
;
; SSE_32_WIN-LABEL: x_to_u32:
; SSE_32_WIN: # %bb.0:
-; SSE_32_WIN-NEXT: subl $8, %esp
-; SSE_32_WIN-NEXT: fldt {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT: flds __real at 4f000000
-; SSE_32_WIN-NEXT: fld %st(1)
-; SSE_32_WIN-NEXT: fsub %st(1), %st
-; SSE_32_WIN-NEXT: xorl %eax, %eax
-; SSE_32_WIN-NEXT: fxch %st(1)
-; SSE_32_WIN-NEXT: fucompi %st(2), %st
-; SSE_32_WIN-NEXT: fcmovnbe %st(1), %st
-; SSE_32_WIN-NEXT: fstp %st(1)
-; SSE_32_WIN-NEXT: setbe %al
-; SSE_32_WIN-NEXT: fnstcw (%esp)
-; SSE_32_WIN-NEXT: movzwl (%esp), %ecx
-; SSE_32_WIN-NEXT: orl $3072, %ecx # imm = 0xC00
-; SSE_32_WIN-NEXT: movw %cx, {{[0-9]+}}(%esp)
+; SSE_32_WIN-NEXT: pushl %ebp
+; SSE_32_WIN-NEXT: movl %esp, %ebp
+; SSE_32_WIN-NEXT: andl $-8, %esp
+; SSE_32_WIN-NEXT: subl $16, %esp
+; SSE_32_WIN-NEXT: fldt 8(%ebp)
+; SSE_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp)
+; SSE_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; SSE_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00
+; SSE_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT: fistpl {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT: fldcw (%esp)
-; SSE_32_WIN-NEXT: shll $31, %eax
-; SSE_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; SSE_32_WIN-NEXT: addl $8, %esp
+; SSE_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp)
+; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE_32_WIN-NEXT: movl %ebp, %esp
+; SSE_32_WIN-NEXT: popl %ebp
; SSE_32_WIN-NEXT: retl
;
; SSE_32_LIN-LABEL: x_to_u32:
; SSE_32_LIN: # %bb.0:
-; SSE_32_LIN-NEXT: subl $8, %esp
+; SSE_32_LIN-NEXT: subl $20, %esp
; SSE_32_LIN-NEXT: fldt {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT: flds {{\.LCPI.*}}
-; SSE_32_LIN-NEXT: fld %st(1)
-; SSE_32_LIN-NEXT: fsub %st(1), %st
-; SSE_32_LIN-NEXT: xorl %eax, %eax
-; SSE_32_LIN-NEXT: fxch %st(1)
-; SSE_32_LIN-NEXT: fucompi %st(2), %st
-; SSE_32_LIN-NEXT: fcmovnbe %st(1), %st
-; SSE_32_LIN-NEXT: fstp %st(1)
-; SSE_32_LIN-NEXT: setbe %al
-; SSE_32_LIN-NEXT: fnstcw (%esp)
-; SSE_32_LIN-NEXT: movzwl (%esp), %ecx
-; SSE_32_LIN-NEXT: orl $3072, %ecx # imm = 0xC00
-; SSE_32_LIN-NEXT: movw %cx, {{[0-9]+}}(%esp)
+; SSE_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp)
+; SSE_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; SSE_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00
+; SSE_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT: fistpl {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT: fldcw (%esp)
-; SSE_32_LIN-NEXT: shll $31, %eax
-; SSE_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; SSE_32_LIN-NEXT: addl $8, %esp
+; SSE_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp)
+; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE_32_LIN-NEXT: addl $20, %esp
; SSE_32_LIN-NEXT: retl
;
; X87_WIN-LABEL: x_to_u32:
More information about the llvm-commits
mailing list