[llvm] [DAGCombiner] Simplifying `{si|ui}tofp` when only signbit is needed (PR #85445)

Sun Mar 17 11:09:35 PDT 2024

https://github.com/goldsteinn updated https://github.com/llvm/llvm-project/pull/85445

>From 37e6b341fc75f23c980fa3c584bda9b6a4d05636 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 15 Mar 2024 12:44:15 -0500
Subject: [PATCH 1/2] [DAGCombiner] Add tests for simplifying `{si|ui}tofp`;
 NFC

---
 llvm/test/CodeGen/X86/int-to-fp-demanded.ll | 382 ++++++++++++++++++++
 1 file changed, 382 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/int-to-fp-demanded.ll

diff --git a/llvm/test/CodeGen/X86/int-to-fp-demanded.ll b/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
new file mode 100644
index 00000000000000..cdde03fb0534b9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
@@ -0,0 +1,382 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+
+declare void @use.i1(i1)
+declare void @use.i32(i32)
+define i32 @sitofp_signbit_only(i32 %i_in) nounwind {
+; X86-LABEL: sitofp_signbit_only:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    fildl (%esp)
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_signbit_only:
+; X64:       # %bb.0:
+; X64-NEXT:    cvtsi2ss %edi, %xmm0
+; X64-NEXT:    movmskps %xmm0, %eax
+; X64-NEXT:    shll $31, %eax
+; X64-NEXT:    retq
+  %f = sitofp i32 %i_in to float
+  %i = bitcast float %f to i32
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define i32 @sitofp_signbit_only_okay_width(i16 %i_in) nounwind {
+; X86-LABEL: sitofp_signbit_only_okay_width:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X86-NEXT:    filds {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_signbit_only_okay_width:
+; X64:       # %bb.0:
+; X64-NEXT:    movswl %di, %eax
+; X64-NEXT:    cvtsi2ss %eax, %xmm0
+; X64-NEXT:    movmskps %xmm0, %eax
+; X64-NEXT:    shll $31, %eax
+; X64-NEXT:    retq
+  %f = sitofp i16 %i_in to float
+  %i = bitcast float %f to i32
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define i32 @sitofp_signbit_only_fail_bad_width1(i64 %i_in) nounwind {
+; X86-LABEL: sitofp_signbit_only_fail_bad_width1:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    fildll {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps (%esp)
+; X86-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT:    andl (%esp), %eax
+; X86-NEXT:    popl %ecx
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_signbit_only_fail_bad_width1:
+; X64:       # %bb.0:
+; X64-NEXT:    cvtsi2ss %rdi, %xmm0
+; X64-NEXT:    movmskps %xmm0, %eax
+; X64-NEXT:    shll $31, %eax
+; X64-NEXT:    retq
+  %f = sitofp i64 %i_in to float
+  %i = bitcast float %f to i32
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define <2 x i16> @sitofp_signbit_only_fail_bad_width2(i32 %i_in) nounwind {
+; X86-LABEL: sitofp_signbit_only_fail_bad_width2:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    fildl (%esp)
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shrl $16, %edx
+; X86-NEXT:    andl $32768, %eax # imm = 0x8000
+; X86-NEXT:    andl $32768, %edx # imm = 0x8000
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    # kill: def $dx killed $dx killed $edx
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_signbit_only_fail_bad_width2:
+; X64:       # %bb.0:
+; X64-NEXT:    cvtsi2ss %edi, %xmm0
+; X64-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %f = sitofp i32 %i_in to float
+  %i2xi16 = bitcast float %f to <2 x i16>
+  %r = and <2 x i16> %i2xi16, <i16 32768, i16 32768>
+  ret <2 x i16> %r
+}
+
+define i32 @sitofp_many_bits_fail(i32 %i_in) nounwind {
+; X86-LABEL: sitofp_many_bits_fail:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    fildl (%esp)
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-2147483647, %eax # imm = 0x80000001
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_many_bits_fail:
+; X64:       # %bb.0:
+; X64-NEXT:    cvtsi2ss %edi, %xmm0
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    andl $-2147483647, %eax # imm = 0x80000001
+; X64-NEXT:    retq
+  %f = sitofp i32 %i_in to float
+  %i = bitcast float %f to i32
+  %r = and i32 %i, 2147483649
+  ret i32 %r
+}
+
+define i32 @sitofp_multiuse_fail(i32 %i_in) nounwind {
+; X86-LABEL: sitofp_multiuse_fail:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    fildl {{[0-9]+}}(%esp)
+; X86-NEXT:    fsts {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps (%esp)
+; X86-NEXT:    calll use.i32 at PLT
+; X86-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_multiuse_fail:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    cvtsi2ss %edi, %xmm0
+; X64-NEXT:    movd %xmm0, %ebx
+; X64-NEXT:    movl %ebx, %edi
+; X64-NEXT:    callq use.i32 at PLT
+; X64-NEXT:    andl $-2147483648, %ebx # imm = 0x80000000
+; X64-NEXT:    movl %ebx, %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    retq
+  %f = sitofp i32 %i_in to float
+  %i = bitcast float %f to i32
+  call void @use.i32(i32 %i)
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define i32 @sitofp_multiuse_okay(i32 %i_in) nounwind {
+; X86-LABEL: sitofp_multiuse_okay:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    fildl {{[0-9]+}}(%esp)
+; X86-NEXT:    fsts {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps (%esp)
+; X86-NEXT:    calll use.i1 at PLT
+; X86-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_multiuse_okay:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    cvtsi2ss %edi, %xmm0
+; X64-NEXT:    movd %xmm0, %ebx
+; X64-NEXT:    movl %ebx, %edi
+; X64-NEXT:    callq use.i1 at PLT
+; X64-NEXT:    andl $-2147483648, %ebx # imm = 0x80000000
+; X64-NEXT:    movl %ebx, %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    retq
+  %f = sitofp i32 %i_in to float
+  %i = bitcast float %f to i32
+  %cmp = icmp slt i32 %i, 0
+  call void @use.i1(i32 %i)
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define i32 @uitofp_signbit_only(i32 %i_in) nounwind {
+; X86-LABEL: uitofp_signbit_only:
+; X86:       # %bb.0:
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_signbit_only:
+; X64:       # %bb.0:
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    retq
+  %f = uitofp i32 %i_in to float
+  %i = bitcast float %f to i32
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define i32 @uitofp_signbit_only_okay_width(i16 %i_in) nounwind {
+; X86-LABEL: uitofp_signbit_only_okay_width:
+; X86:       # %bb.0:
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_signbit_only_okay_width:
+; X64:       # %bb.0:
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    retq
+  %f = uitofp i16 %i_in to float
+  %i = bitcast float %f to i32
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define i32 @uitofp_signbit_only_okay_width1(i64 %i_in) nounwind {
+; X86-LABEL: uitofp_signbit_only_okay_width1:
+; X86:       # %bb.0:
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_signbit_only_okay_width1:
+; X64:       # %bb.0:
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    retq
+  %f = uitofp i64 %i_in to float
+  %i = bitcast float %f to i32
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define <2 x i16> @uitofp_signbit_only_fail_bad_width2(i32 %i_in) nounwind {
+; X86-LABEL: uitofp_signbit_only_fail_bad_width2:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    fildll {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $32768, %eax # imm = 0x8000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_signbit_only_fail_bad_width2:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cvtsi2ss %rax, %xmm0
+; X64-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %f = uitofp i32 %i_in to float
+  %i2xi16 = bitcast float %f to <2 x i16>
+  %r = and <2 x i16> %i2xi16, <i16 32768, i16 32768>
+  ret <2 x i16> %r
+}
+
+define i32 @uitofp_many_bits_fail(i32 %i_in) nounwind {
+; X86-LABEL: uitofp_many_bits_fail:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    fildll {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_many_bits_fail:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cvtsi2ss %rax, %xmm0
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    retq
+  %f = uitofp i32 %i_in to float
+  %i = bitcast float %f to i32
+  %r = and i32 %i, 2147483649
+  ret i32 %r
+}
+
+define i32 @uitofp_multiuse_fail(i32 %i_in) nounwind {
+; X86-LABEL: uitofp_multiuse_fail:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    fildll {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps (%esp)
+; X86-NEXT:    calll use.i32 at PLT
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_multiuse_fail:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cvtsi2ss %rax, %xmm0
+; X64-NEXT:    movd %xmm0, %edi
+; X64-NEXT:    callq use.i32 at PLT
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %f = uitofp i32 %i_in to float
+  %i = bitcast float %f to i32
+  call void @use.i32(i32 %i)
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define i32 @uitofp_multiuse_okay(i32 %i_in) nounwind {
+; X86-LABEL: uitofp_multiuse_okay:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    fildll {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps (%esp)
+; X86-NEXT:    calll use.i1 at PLT
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_multiuse_okay:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cvtsi2ss %rax, %xmm0
+; X64-NEXT:    movd %xmm0, %edi
+; X64-NEXT:    callq use.i1 at PLT
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %f = uitofp i32 %i_in to float
+  %i = bitcast float %f to i32
+  %cmp = icmp slt i32 %i, 0
+  call void @use.i1(i32 %i)
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}

>From b8b3e10bc5ba6b8d62a4f2da61f6efa5a302636f Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 15 Mar 2024 12:45:31 -0500
Subject: [PATCH 2/2] [DAGCombiner] Simplifying `{si|ui}tofp` when only signbit
 is needed

If we only need the signbit `uitofp` simplified to 0, and `sitofp`
simplifies to `bitcast`.
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 30 +++++++++++++++++++
 .../CodeGen/X86/combine-sse41-intrinsics.ll   |  3 +-
 llvm/test/CodeGen/X86/int-to-fp-demanded.ll   | 21 +++++--------
 3 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 57f8fc409de453..16069c6c0dc317 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -611,6 +611,25 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
   return false;
 }
 
+static SDValue simplifyUseOfIntToFP(SDValue Op, const APInt &DemandedBits,
+                                    SelectionDAG &DAG) {
+  unsigned Opc = Op.getOpcode();
+  assert((Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP) &&
+         "Invalid Int -> FP Opcode");
+  if (!DemandedBits.isSignMask())
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (Opc == ISD::UINT_TO_FP)
+    return DAG.getConstant(0, SDLoc(Op), VT);
+
+  EVT InnerVT = Op.getOperand(0).getValueType();
+  if (VT.getScalarSizeInBits() == InnerVT.getScalarSizeInBits())
+    return DAG.getBitcast(VT, Op.getOperand(0));
+
+  return SDValue();
+}
+
 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
                                           DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -816,6 +835,11 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
     }
     break;
   }
+  case ISD::UINT_TO_FP:
+  case ISD::SINT_TO_FP:
+    if (SDValue R = simplifyUseOfIntToFP(Op, DemandedBits, DAG))
+      return R;
+    break;
   case ISD::SIGN_EXTEND_INREG: {
     // If none of the extended bits are demanded, eliminate the sextinreg.
     SDValue Op0 = Op.getOperand(0);
@@ -2313,6 +2337,12 @@ bool TargetLowering::SimplifyDemandedBits(
     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
     break;
   }
+  case ISD::UINT_TO_FP:
+  case ISD::SINT_TO_FP:
+    if (SDValue R = simplifyUseOfIntToFP(Op, DemandedBits, TLO.DAG))
+      return TLO.CombineTo(Op, R);
+    Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+    break;
   case ISD::SIGN_EXTEND_INREG: {
     SDValue Op0 = Op.getOperand(0);
     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
diff --git a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
index cbb5bd09c2399a..a332b3e8908003 100644
--- a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
@@ -164,14 +164,13 @@ define <4 x float> @demandedbits_sitofp_blendvps(<4 x float> %a0, <4 x float> %a
 ; SSE-LABEL: demandedbits_sitofp_blendvps:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movaps %xmm0, %xmm3
-; SSE-NEXT:    cvtdq2ps %xmm2, %xmm0
+; SSE-NEXT:    movaps %xmm2, %xmm0
 ; SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3
 ; SSE-NEXT:    movaps %xmm3, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: demandedbits_sitofp_blendvps:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vcvtdq2ps %xmm2, %xmm2
 ; AVX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %cvt = sitofp <4 x i32> %a2 to <4 x float>
diff --git a/llvm/test/CodeGen/X86/int-to-fp-demanded.ll b/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
index cdde03fb0534b9..8652136ae5cd94 100644
--- a/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
+++ b/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
@@ -7,19 +7,13 @@ declare void @use.i32(i32)
 define i32 @sitofp_signbit_only(i32 %i_in) nounwind {
 ; X86-LABEL: sitofp_signbit_only:
 ; X86:       # %bb.0:
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, (%esp)
-; X86-NEXT:    fildl (%esp)
-; X86-NEXT:    fstps {{[0-9]+}}(%esp)
 ; X86-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
 ; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: sitofp_signbit_only:
 ; X64:       # %bb.0:
-; X64-NEXT:    cvtsi2ss %edi, %xmm0
+; X64-NEXT:    movd %edi, %xmm0
 ; X64-NEXT:    movmskps %xmm0, %eax
 ; X64-NEXT:    shll $31, %eax
 ; X64-NEXT:    retq
@@ -44,8 +38,8 @@ define i32 @sitofp_signbit_only_okay_width(i16 %i_in) nounwind {
 ;
 ; X64-LABEL: sitofp_signbit_only_okay_width:
 ; X64:       # %bb.0:
-; X64-NEXT:    movswl %di, %eax
-; X64-NEXT:    cvtsi2ss %eax, %xmm0
+; X64-NEXT:    shll $16, %edi
+; X64-NEXT:    movd %edi, %xmm0
 ; X64-NEXT:    movmskps %xmm0, %eax
 ; X64-NEXT:    shll $31, %eax
 ; X64-NEXT:    retq
@@ -82,15 +76,14 @@ define <2 x i16> @sitofp_signbit_only_fail_bad_width2(i32 %i_in) nounwind {
 ; X86-LABEL: sitofp_signbit_only_fail_bad_width2:
 ; X86:       # %bb.0:
 ; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %edx, (%esp)
 ; X86-NEXT:    fildl (%esp)
 ; X86-NEXT:    fstps {{[0-9]+}}(%esp)
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %edx
 ; X86-NEXT:    shrl $16, %edx
-; X86-NEXT:    andl $32768, %eax # imm = 0x8000
 ; X86-NEXT:    andl $32768, %edx # imm = 0x8000
+; X86-NEXT:    movl $32768, %eax # imm = 0x8000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    # kill: def $dx killed $dx killed $edx
 ; X86-NEXT:    addl $8, %esp