[llvm] goldsteinn/simplify int to fp (PR #85445)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 15 11:39:16 PDT 2024
https://github.com/goldsteinn created https://github.com/llvm/llvm-project/pull/85445
- **[DAGCombiner] Add tests for simplifying `{si|ui}tofp`; NFC**
- **[DAGCombiner] Simplifying `{si|ui}tofp` when only signbit is needed**
>From 0e4359fe7250c2d15669037f897ace0624f2b95c Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 15 Mar 2024 12:44:15 -0500
Subject: [PATCH 1/2] [DAGCombiner] Add tests for simplifying `{si|ui}tofp`;
NFC
---
llvm/test/CodeGen/X86/int-to-fp-demanded.ll | 422 ++++++++++++++++++++
1 file changed, 422 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/int-to-fp-demanded.ll
diff --git a/llvm/test/CodeGen/X86/int-to-fp-demanded.ll b/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
new file mode 100644
index 00000000000000..2739949c4cb6dd
--- /dev/null
+++ b/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
@@ -0,0 +1,422 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+
+declare void @use.i1(i1)
+declare void @use.i32(i32)
+define i32 @sitofp_signbit_only(i32 %i_in) {
+; X86-LABEL: sitofp_signbit_only:
+; X86: # %bb.0:
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: fildl (%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_signbit_only:
+; X64: # %bb.0:
+; X64-NEXT: cvtsi2ss %edi, %xmm0
+; X64-NEXT: movmskps %xmm0, %eax
+; X64-NEXT: shll $31, %eax
+; X64-NEXT: retq
+ %f = sitofp i32 %i_in to float
+ %i = bitcast float %f to i32
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define i32 @sitofp_signbit_only_okay_width(i16 %i_in) {
+; X86-LABEL: sitofp_signbit_only_okay_width:
+; X86: # %bb.0:
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; X86-NEXT: filds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_signbit_only_okay_width:
+; X64: # %bb.0:
+; X64-NEXT: movswl %di, %eax
+; X64-NEXT: cvtsi2ss %eax, %xmm0
+; X64-NEXT: movmskps %xmm0, %eax
+; X64-NEXT: shll $31, %eax
+; X64-NEXT: retq
+ %f = sitofp i16 %i_in to float
+ %i = bitcast float %f to i32
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define i32 @sitofp_signbit_only_fail_bad_width1(i64 %i_in) {
+; X86-LABEL: sitofp_signbit_only_fail_bad_width1:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: fildll {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: andl (%esp), %eax
+; X86-NEXT: popl %ecx
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_signbit_only_fail_bad_width1:
+; X64: # %bb.0:
+; X64-NEXT: cvtsi2ss %rdi, %xmm0
+; X64-NEXT: movmskps %xmm0, %eax
+; X64-NEXT: shll $31, %eax
+; X64-NEXT: retq
+ %f = sitofp i64 %i_in to float
+ %i = bitcast float %f to i32
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define <2 x i16> @sitofp_signbit_only_fail_bad_width2(i32 %i_in) {
+; X86-LABEL: sitofp_signbit_only_fail_bad_width2:
+; X86: # %bb.0:
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: fildl (%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: shrl $16, %edx
+; X86-NEXT: andl $32768, %eax # imm = 0x8000
+; X86-NEXT: andl $32768, %edx # imm = 0x8000
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: # kill: def $dx killed $dx killed $edx
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_signbit_only_fail_bad_width2:
+; X64: # %bb.0:
+; X64-NEXT: cvtsi2ss %edi, %xmm0
+; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: retq
+ %f = sitofp i32 %i_in to float
+ %i2xi16 = bitcast float %f to <2 x i16>
+ %r = and <2 x i16> %i2xi16, <i16 32768, i16 32768>
+ ret <2 x i16> %r
+}
+
+define i32 @sitofp_many_bits_fail(i32 %i_in) {
+; X86-LABEL: sitofp_many_bits_fail:
+; X86: # %bb.0:
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: fildl (%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: movl $-2147483647, %eax # imm = 0x80000001
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_many_bits_fail:
+; X64: # %bb.0:
+; X64-NEXT: cvtsi2ss %edi, %xmm0
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $-2147483647, %eax # imm = 0x80000001
+; X64-NEXT: retq
+ %f = sitofp i32 %i_in to float
+ %i = bitcast float %f to i32
+ %r = and i32 %i, 2147483649
+ ret i32 %r
+}
+
+define i32 @sitofp_multiuse_fail(i32 %i_in) {
+; X86-LABEL: sitofp_multiuse_fail:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: fildl {{[0-9]+}}(%esp)
+; X86-NEXT: fsts {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: calll use.i32 at PLT
+; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_multiuse_fail:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: .cfi_offset %rbx, -16
+; X64-NEXT: cvtsi2ss %edi, %xmm0
+; X64-NEXT: movd %xmm0, %ebx
+; X64-NEXT: movl %ebx, %edi
+; X64-NEXT: callq use.i32 at PLT
+; X64-NEXT: andl $-2147483648, %ebx # imm = 0x80000000
+; X64-NEXT: movl %ebx, %eax
+; X64-NEXT: popq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+ %f = sitofp i32 %i_in to float
+ %i = bitcast float %f to i32
+ call void @use.i32(i32 %i)
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define i32 @sitofp_multiuse_okay(i32 %i_in) {
+; X86-LABEL: sitofp_multiuse_okay:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: fildl {{[0-9]+}}(%esp)
+; X86-NEXT: fsts {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: calll use.i1 at PLT
+; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_multiuse_okay:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: .cfi_offset %rbx, -16
+; X64-NEXT: cvtsi2ss %edi, %xmm0
+; X64-NEXT: movd %xmm0, %ebx
+; X64-NEXT: movl %ebx, %edi
+; X64-NEXT: callq use.i1 at PLT
+; X64-NEXT: andl $-2147483648, %ebx # imm = 0x80000000
+; X64-NEXT: movl %ebx, %eax
+; X64-NEXT: popq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+ %f = sitofp i32 %i_in to float
+ %i = bitcast float %f to i32
+ %cmp = icmp slt i32 %i, 0
+ call void @use.i1(i32 %i)
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define i32 @uitofp_signbit_only(i32 %i_in) {
+; X86-LABEL: uitofp_signbit_only:
+; X86: # %bb.0:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_signbit_only:
+; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: retq
+ %f = uitofp i32 %i_in to float
+ %i = bitcast float %f to i32
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define i32 @uitofp_signbit_only_okay_width(i16 %i_in) {
+; X86-LABEL: uitofp_signbit_only_okay_width:
+; X86: # %bb.0:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_signbit_only_okay_width:
+; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: retq
+ %f = uitofp i16 %i_in to float
+ %i = bitcast float %f to i32
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define i32 @uitofp_signbit_only_okay_width1(i64 %i_in) {
+; X86-LABEL: uitofp_signbit_only_okay_width1:
+; X86: # %bb.0:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_signbit_only_okay_width1:
+; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: retq
+ %f = uitofp i64 %i_in to float
+ %i = bitcast float %f to i32
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define <2 x i16> @uitofp_signbit_only_fail_bad_width2(i32 %i_in) {
+; X86-LABEL: uitofp_signbit_only_fail_bad_width2:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: fildll {{[0-9]+}}(%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: movl $32768, %eax # imm = 0x8000
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_signbit_only_fail_bad_width2:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cvtsi2ss %rax, %xmm0
+; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: retq
+ %f = uitofp i32 %i_in to float
+ %i2xi16 = bitcast float %f to <2 x i16>
+ %r = and <2 x i16> %i2xi16, <i16 32768, i16 32768>
+ ret <2 x i16> %r
+}
+
+define i32 @uitofp_many_bits_fail(i32 %i_in) {
+; X86-LABEL: uitofp_many_bits_fail:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: fildll {{[0-9]+}}(%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_many_bits_fail:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cvtsi2ss %rax, %xmm0
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $1, %eax
+; X64-NEXT: retq
+ %f = uitofp i32 %i_in to float
+ %i = bitcast float %f to i32
+ %r = and i32 %i, 2147483649
+ ret i32 %r
+}
+
+define i32 @uitofp_multiuse_fail(i32 %i_in) {
+; X86-LABEL: uitofp_multiuse_fail:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: fildll {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: calll use.i32 at PLT
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_multiuse_fail:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cvtsi2ss %rax, %xmm0
+; X64-NEXT: movd %xmm0, %edi
+; X64-NEXT: callq use.i32 at PLT
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+ %f = uitofp i32 %i_in to float
+ %i = bitcast float %f to i32
+ call void @use.i32(i32 %i)
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define i32 @uitofp_multiuse_okay(i32 %i_in) {
+; X86-LABEL: uitofp_multiuse_okay:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: fildll {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: calll use.i1 at PLT
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_multiuse_okay:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cvtsi2ss %rax, %xmm0
+; X64-NEXT: movd %xmm0, %edi
+; X64-NEXT: callq use.i1 at PLT
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+ %f = uitofp i32 %i_in to float
+ %i = bitcast float %f to i32
+ %cmp = icmp slt i32 %i, 0
+ call void @use.i1(i32 %i)
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
>From 78ac07fe49547074ce387f301b5b645ee8fa0b7c Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 15 Mar 2024 12:45:31 -0500
Subject: [PATCH 2/2] [DAGCombiner] Simplifying `{si|ui}tofp` when only signbit
is needed
If we only need the signbit `uitofp` simplified to 0, and `sitofp`
simplifies to `bitcast`.
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 28 +++++++++++++++++++
.../CodeGen/X86/combine-sse41-intrinsics.ll | 3 +-
llvm/test/CodeGen/X86/int-to-fp-demanded.ll | 23 +++++----------
3 files changed, 36 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 57f8fc409de453..88a4c8124981ee 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -816,6 +816,18 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
}
break;
}
+ case ISD::SINT_TO_FP: {
+ EVT InnerVT = Op.getOperand(0).getValueType();
+ if (DemandedBits.isSignMask() &&
+ VT.getScalarSizeInBits() == InnerVT.getScalarSizeInBits())
+ return DAG.getBitcast(VT, Op.getOperand(0));
+ break;
+ }
+ case ISD::UINT_TO_FP: {
+ if (DemandedBits.isSignMask())
+ return DAG.getConstant(0, SDLoc(Op), VT);
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
// If none of the extended bits are demanded, eliminate the sextinreg.
SDValue Op0 = Op.getOperand(0);
@@ -2313,6 +2325,22 @@ bool TargetLowering::SimplifyDemandedBits(
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
break;
}
+ case ISD::SINT_TO_FP: {
+ EVT InnerVT = Op.getOperand(0).getValueType();
+ if (DemandedBits.isSignMask() &&
+ VT.getScalarSizeInBits() == InnerVT.getScalarSizeInBits())
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Op.getOperand(0)));
+
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ break;
+ }
+ case ISD::UINT_TO_FP: {
+ if (DemandedBits.isSignMask())
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
+
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
diff --git a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
index cbb5bd09c2399a..a332b3e8908003 100644
--- a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
@@ -164,14 +164,13 @@ define <4 x float> @demandedbits_sitofp_blendvps(<4 x float> %a0, <4 x float> %a
; SSE-LABEL: demandedbits_sitofp_blendvps:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm3
-; SSE-NEXT: cvtdq2ps %xmm2, %xmm0
+; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3
; SSE-NEXT: movaps %xmm3, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: demandedbits_sitofp_blendvps:
; AVX: # %bb.0:
-; AVX-NEXT: vcvtdq2ps %xmm2, %xmm2
; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%cvt = sitofp <4 x i32> %a2 to <4 x float>
diff --git a/llvm/test/CodeGen/X86/int-to-fp-demanded.ll b/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
index 2739949c4cb6dd..0735d48027196b 100644
--- a/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
+++ b/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
@@ -7,21 +7,13 @@ declare void @use.i32(i32)
define i32 @sitofp_signbit_only(i32 %i_in) {
; X86-LABEL: sitofp_signbit_only:
; X86: # %bb.0:
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: .cfi_def_cfa_offset 12
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, (%esp)
-; X86-NEXT: fildl (%esp)
-; X86-NEXT: fstps {{[0-9]+}}(%esp)
; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: addl $8, %esp
-; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: sitofp_signbit_only:
; X64: # %bb.0:
-; X64-NEXT: cvtsi2ss %edi, %xmm0
+; X64-NEXT: movd %edi, %xmm0
; X64-NEXT: movmskps %xmm0, %eax
; X64-NEXT: shll $31, %eax
; X64-NEXT: retq
@@ -48,8 +40,8 @@ define i32 @sitofp_signbit_only_okay_width(i16 %i_in) {
;
; X64-LABEL: sitofp_signbit_only_okay_width:
; X64: # %bb.0:
-; X64-NEXT: movswl %di, %eax
-; X64-NEXT: cvtsi2ss %eax, %xmm0
+; X64-NEXT: shll $16, %edi
+; X64-NEXT: movd %edi, %xmm0
; X64-NEXT: movmskps %xmm0, %eax
; X64-NEXT: shll $31, %eax
; X64-NEXT: retq
@@ -89,15 +81,14 @@ define <2 x i16> @sitofp_signbit_only_fail_bad_width2(i32 %i_in) {
; X86: # %bb.0:
; X86-NEXT: subl $8, %esp
; X86-NEXT: .cfi_def_cfa_offset 12
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, (%esp)
; X86-NEXT: fildl (%esp)
; X86-NEXT: fstps {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %edx
; X86-NEXT: shrl $16, %edx
-; X86-NEXT: andl $32768, %eax # imm = 0x8000
; X86-NEXT: andl $32768, %edx # imm = 0x8000
+; X86-NEXT: movl $32768, %eax # imm = 0x8000
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: # kill: def $dx killed $dx killed $edx
; X86-NEXT: addl $8, %esp
More information about the llvm-commits
mailing list