[llvm] r370699 - [X86] Don't use Expand for i32 fp_to_uint on SSE1/2 targets on 32-bit target.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 2 22:57:19 PDT 2019


Author: ctopper
Date: Mon Sep  2 22:57:18 2019
New Revision: 370699

URL: http://llvm.org/viewvc/llvm-project?rev=370699&view=rev
Log:
[X86] Don't use Expand for i32 fp_to_uint on SSE1/2 targets on 32-bit target.

Use Custom lowering instead. Fall back to default expansion only
when the scalar FP type belongs in an XMM register. This improves
lowering for i32 to fp80, and also i32 to double on SSE1 only.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/scalar-fp-to-i32.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=370699&r1=370698&r2=370699&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Sep  2 22:57:18 2019
@@ -287,19 +287,8 @@ X86TargetLowering::X86TargetLowering(con
       setOperationAction(ISD::FP_TO_UINT   , MVT::i64  , Expand);
     }
   } else if (!Subtarget.useSoftFloat()) {
-    // Since AVX is a superset of SSE3, only check for SSE here.
-    if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
-      // Expand FP_TO_UINT into a select.
-      // FIXME: We would like to use a Custom expander here eventually to do
-      // the optimal thing for SSE vs. the default expansion in the legalizer.
-      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
-    else
-      // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
-      // With SSE3 we can use fisttpll to convert to a signed i64; without
-      // SSE, we're stuck with a fistpll.
-      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
-
-    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
   }
 
   // TODO: when we have SSE, these could be more efficient, by using movd/movq.
@@ -19425,6 +19414,11 @@ SDValue X86TargetLowering::LowerFP_TO_IN
   if (UseSSEReg && IsSigned)
     return Op;
 
+  // Use default expansion for SSE1/2 targets without SSE3. With SSE3 we can use
+  // fisttp.
+  if (!IsSigned && UseSSEReg && !Subtarget.hasSSE3())
+    return SDValue();
+
   // Fall back to X87.
   if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned))
     return V;

Modified: llvm/trunk/test/CodeGen/X86/scalar-fp-to-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/scalar-fp-to-i32.ll?rev=370699&r1=370698&r2=370699&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/scalar-fp-to-i32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/scalar-fp-to-i32.ll Mon Sep  2 22:57:18 2019
@@ -257,74 +257,36 @@ define i32 @d_to_u32(double %a) nounwind
 ;
 ; SSE_32_WIN-LABEL: d_to_u32:
 ; SSE_32_WIN:       # %bb.0:
+; SSE_32_WIN-NEXT:    pushl %ebp
+; SSE_32_WIN-NEXT:    movl %esp, %ebp
+; SSE_32_WIN-NEXT:    andl $-8, %esp
 ; SSE_32_WIN-NEXT:    subl $16, %esp
-; SSE_32_WIN-NEXT:    fldl {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    flds __real at 4f000000
-; SSE_32_WIN-NEXT:    fld %st(1)
-; SSE_32_WIN-NEXT:    fsub %st(1), %st
+; SSE_32_WIN-NEXT:    fldl 8(%ebp)
 ; SSE_32_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
 ; SSE_32_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; SSE_32_WIN-NEXT:    orl $3072, %eax # imm = 0xC00
 ; SSE_32_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; SSE_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    fistpl {{[0-9]+}}(%esp)
+; SSE_32_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; SSE_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    fnstcw (%esp)
-; SSE_32_WIN-NEXT:    movzwl (%esp), %eax
-; SSE_32_WIN-NEXT:    orl $3072, %eax # imm = 0xC00
-; SSE_32_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    fxch %st(1)
-; SSE_32_WIN-NEXT:    fistl {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    fldcw (%esp)
-; SSE_32_WIN-NEXT:    fxch %st(1)
-; SSE_32_WIN-NEXT:    fucompi %st(1), %st
-; SSE_32_WIN-NEXT:    fstp %st(0)
-; SSE_32_WIN-NEXT:    jbe LBB2_1
-; SSE_32_WIN-NEXT:  # %bb.2:
 ; SSE_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; SSE_32_WIN-NEXT:    addl $16, %esp
-; SSE_32_WIN-NEXT:    retl
-; SSE_32_WIN-NEXT:  LBB2_1:
-; SSE_32_WIN-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
-; SSE_32_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; SSE_32_WIN-NEXT:    addl $16, %esp
+; SSE_32_WIN-NEXT:    movl %ebp, %esp
+; SSE_32_WIN-NEXT:    popl %ebp
 ; SSE_32_WIN-NEXT:    retl
 ;
 ; SSE_32_LIN-LABEL: d_to_u32:
 ; SSE_32_LIN:       # %bb.0:
-; SSE_32_LIN-NEXT:    subl $16, %esp
+; SSE_32_LIN-NEXT:    subl $20, %esp
 ; SSE_32_LIN-NEXT:    fldl {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    flds {{\.LCPI.*}}
-; SSE_32_LIN-NEXT:    fld %st(1)
-; SSE_32_LIN-NEXT:    fsub %st(1), %st
 ; SSE_32_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
 ; SSE_32_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; SSE_32_LIN-NEXT:    orl $3072, %eax # imm = 0xC00
 ; SSE_32_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; SSE_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    fistpl {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    fnstcw (%esp)
-; SSE_32_LIN-NEXT:    movzwl (%esp), %eax
-; SSE_32_LIN-NEXT:    orl $3072, %eax # imm = 0xC00
-; SSE_32_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; SSE_32_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; SSE_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    fxch %st(1)
-; SSE_32_LIN-NEXT:    fistl {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    fldcw (%esp)
-; SSE_32_LIN-NEXT:    fxch %st(1)
-; SSE_32_LIN-NEXT:    fucompi %st(1), %st
-; SSE_32_LIN-NEXT:    fstp %st(0)
-; SSE_32_LIN-NEXT:    jbe .LBB2_1
-; SSE_32_LIN-NEXT:  # %bb.2:
 ; SSE_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; SSE_32_LIN-NEXT:    addl $16, %esp
-; SSE_32_LIN-NEXT:    retl
-; SSE_32_LIN-NEXT:  .LBB2_1:
-; SSE_32_LIN-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
-; SSE_32_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; SSE_32_LIN-NEXT:    addl $16, %esp
+; SSE_32_LIN-NEXT:    addl $20, %esp
 ; SSE_32_LIN-NEXT:    retl
 ;
 ; X87_WIN-LABEL: d_to_u32:
@@ -507,52 +469,36 @@ define i32 @x_to_u32(x86_fp80 %a) nounwi
 ;
 ; SSE2_32_WIN-LABEL: x_to_u32:
 ; SSE2_32_WIN:       # %bb.0:
-; SSE2_32_WIN-NEXT:    subl $8, %esp
-; SSE2_32_WIN-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE2_32_WIN-NEXT:    flds __real at 4f000000
-; SSE2_32_WIN-NEXT:    fld %st(1)
-; SSE2_32_WIN-NEXT:    fsub %st(1), %st
-; SSE2_32_WIN-NEXT:    xorl %eax, %eax
-; SSE2_32_WIN-NEXT:    fxch %st(1)
-; SSE2_32_WIN-NEXT:    fucompi %st(2), %st
-; SSE2_32_WIN-NEXT:    fcmovnbe %st(1), %st
-; SSE2_32_WIN-NEXT:    fstp %st(1)
-; SSE2_32_WIN-NEXT:    setbe %al
-; SSE2_32_WIN-NEXT:    fnstcw (%esp)
-; SSE2_32_WIN-NEXT:    movzwl (%esp), %ecx
-; SSE2_32_WIN-NEXT:    orl $3072, %ecx # imm = 0xC00
-; SSE2_32_WIN-NEXT:    movw %cx, {{[0-9]+}}(%esp)
+; SSE2_32_WIN-NEXT:    pushl %ebp
+; SSE2_32_WIN-NEXT:    movl %esp, %ebp
+; SSE2_32_WIN-NEXT:    andl $-8, %esp
+; SSE2_32_WIN-NEXT:    subl $16, %esp
+; SSE2_32_WIN-NEXT:    fldt 8(%ebp)
+; SSE2_32_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; SSE2_32_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; SSE2_32_WIN-NEXT:    orl $3072, %eax # imm = 0xC00
+; SSE2_32_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; SSE2_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
+; SSE2_32_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; SSE2_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE2_32_WIN-NEXT:    fistpl {{[0-9]+}}(%esp)
-; SSE2_32_WIN-NEXT:    fldcw (%esp)
-; SSE2_32_WIN-NEXT:    shll $31, %eax
-; SSE2_32_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; SSE2_32_WIN-NEXT:    addl $8, %esp
+; SSE2_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE2_32_WIN-NEXT:    movl %ebp, %esp
+; SSE2_32_WIN-NEXT:    popl %ebp
 ; SSE2_32_WIN-NEXT:    retl
 ;
 ; SSE2_32_LIN-LABEL: x_to_u32:
 ; SSE2_32_LIN:       # %bb.0:
-; SSE2_32_LIN-NEXT:    subl $8, %esp
+; SSE2_32_LIN-NEXT:    subl $20, %esp
 ; SSE2_32_LIN-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE2_32_LIN-NEXT:    flds {{\.LCPI.*}}
-; SSE2_32_LIN-NEXT:    fld %st(1)
-; SSE2_32_LIN-NEXT:    fsub %st(1), %st
-; SSE2_32_LIN-NEXT:    xorl %eax, %eax
-; SSE2_32_LIN-NEXT:    fxch %st(1)
-; SSE2_32_LIN-NEXT:    fucompi %st(2), %st
-; SSE2_32_LIN-NEXT:    fcmovnbe %st(1), %st
-; SSE2_32_LIN-NEXT:    fstp %st(1)
-; SSE2_32_LIN-NEXT:    setbe %al
-; SSE2_32_LIN-NEXT:    fnstcw (%esp)
-; SSE2_32_LIN-NEXT:    movzwl (%esp), %ecx
-; SSE2_32_LIN-NEXT:    orl $3072, %ecx # imm = 0xC00
-; SSE2_32_LIN-NEXT:    movw %cx, {{[0-9]+}}(%esp)
+; SSE2_32_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; SSE2_32_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; SSE2_32_LIN-NEXT:    orl $3072, %eax # imm = 0xC00
+; SSE2_32_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; SSE2_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE2_32_LIN-NEXT:    fistpl {{[0-9]+}}(%esp)
-; SSE2_32_LIN-NEXT:    fldcw (%esp)
-; SSE2_32_LIN-NEXT:    shll $31, %eax
-; SSE2_32_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; SSE2_32_LIN-NEXT:    addl $8, %esp
+; SSE2_32_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
+; SSE2_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
+; SSE2_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE2_32_LIN-NEXT:    addl $20, %esp
 ; SSE2_32_LIN-NEXT:    retl
 ;
 ; SSE2_64_WIN-LABEL: x_to_u32:
@@ -585,52 +531,36 @@ define i32 @x_to_u32(x86_fp80 %a) nounwi
 ;
 ; SSE_32_WIN-LABEL: x_to_u32:
 ; SSE_32_WIN:       # %bb.0:
-; SSE_32_WIN-NEXT:    subl $8, %esp
-; SSE_32_WIN-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    flds __real at 4f000000
-; SSE_32_WIN-NEXT:    fld %st(1)
-; SSE_32_WIN-NEXT:    fsub %st(1), %st
-; SSE_32_WIN-NEXT:    xorl %eax, %eax
-; SSE_32_WIN-NEXT:    fxch %st(1)
-; SSE_32_WIN-NEXT:    fucompi %st(2), %st
-; SSE_32_WIN-NEXT:    fcmovnbe %st(1), %st
-; SSE_32_WIN-NEXT:    fstp %st(1)
-; SSE_32_WIN-NEXT:    setbe %al
-; SSE_32_WIN-NEXT:    fnstcw (%esp)
-; SSE_32_WIN-NEXT:    movzwl (%esp), %ecx
-; SSE_32_WIN-NEXT:    orl $3072, %ecx # imm = 0xC00
-; SSE_32_WIN-NEXT:    movw %cx, {{[0-9]+}}(%esp)
+; SSE_32_WIN-NEXT:    pushl %ebp
+; SSE_32_WIN-NEXT:    movl %esp, %ebp
+; SSE_32_WIN-NEXT:    andl $-8, %esp
+; SSE_32_WIN-NEXT:    subl $16, %esp
+; SSE_32_WIN-NEXT:    fldt 8(%ebp)
+; SSE_32_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; SSE_32_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; SSE_32_WIN-NEXT:    orl $3072, %eax # imm = 0xC00
+; SSE_32_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; SSE_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    fistpl {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    fldcw (%esp)
-; SSE_32_WIN-NEXT:    shll $31, %eax
-; SSE_32_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; SSE_32_WIN-NEXT:    addl $8, %esp
+; SSE_32_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
+; SSE_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
+; SSE_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE_32_WIN-NEXT:    movl %ebp, %esp
+; SSE_32_WIN-NEXT:    popl %ebp
 ; SSE_32_WIN-NEXT:    retl
 ;
 ; SSE_32_LIN-LABEL: x_to_u32:
 ; SSE_32_LIN:       # %bb.0:
-; SSE_32_LIN-NEXT:    subl $8, %esp
+; SSE_32_LIN-NEXT:    subl $20, %esp
 ; SSE_32_LIN-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    flds {{\.LCPI.*}}
-; SSE_32_LIN-NEXT:    fld %st(1)
-; SSE_32_LIN-NEXT:    fsub %st(1), %st
-; SSE_32_LIN-NEXT:    xorl %eax, %eax
-; SSE_32_LIN-NEXT:    fxch %st(1)
-; SSE_32_LIN-NEXT:    fucompi %st(2), %st
-; SSE_32_LIN-NEXT:    fcmovnbe %st(1), %st
-; SSE_32_LIN-NEXT:    fstp %st(1)
-; SSE_32_LIN-NEXT:    setbe %al
-; SSE_32_LIN-NEXT:    fnstcw (%esp)
-; SSE_32_LIN-NEXT:    movzwl (%esp), %ecx
-; SSE_32_LIN-NEXT:    orl $3072, %ecx # imm = 0xC00
-; SSE_32_LIN-NEXT:    movw %cx, {{[0-9]+}}(%esp)
+; SSE_32_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; SSE_32_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; SSE_32_LIN-NEXT:    orl $3072, %eax # imm = 0xC00
+; SSE_32_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; SSE_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    fistpl {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    fldcw (%esp)
-; SSE_32_LIN-NEXT:    shll $31, %eax
-; SSE_32_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; SSE_32_LIN-NEXT:    addl $8, %esp
+; SSE_32_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
+; SSE_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
+; SSE_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE_32_LIN-NEXT:    addl $20, %esp
 ; SSE_32_LIN-NEXT:    retl
 ;
 ; X87_WIN-LABEL: x_to_u32:




More information about the llvm-commits mailing list