[llvm] goldsteinn/x86 nneg uitofp (PR #86694)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 26 10:00:40 PDT 2024
https://github.com/goldsteinn created https://github.com/llvm/llvm-project/pull/86694
- **[X86] Add tests for `uitofp nneg` -> `sitofp`; NFC**
- **[X86] Use `nneg` flag when trying to convert `uitofp` -> `sitofp`**
>From 42f32f325d1ca3f70e1c478bcf01dcdc64eb46ee Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 22 Mar 2024 12:29:30 -0500
Subject: [PATCH 1/2] [X86] Add tests for `uitofp nneg` -> `sitofp`; NFC
---
llvm/test/CodeGen/X86/uint_to_fp.ll | 70 +++++++++++++++++++++++++++++
1 file changed, 70 insertions(+)
diff --git a/llvm/test/CodeGen/X86/uint_to_fp.ll b/llvm/test/CodeGen/X86/uint_to_fp.ll
index d8e0b61ed199fa..8b9dfedb8da02f 100644
--- a/llvm/test/CodeGen/X86/uint_to_fp.ll
+++ b/llvm/test/CodeGen/X86/uint_to_fp.ll
@@ -25,3 +25,73 @@ entry:
store float %1, ptr %y
ret void
}
+
+define float @test_without_nneg(i32 %x) nounwind {
+; X86-LABEL: test_without_nneg:
+; X86: ## %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: orpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: cvtsd2ss %xmm0, %xmm0
+; X86-NEXT: movss %xmm0, (%esp)
+; X86-NEXT: flds (%esp)
+; X86-NEXT: popl %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: test_without_nneg:
+; X64: ## %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cvtsi2ss %rax, %xmm0
+; X64-NEXT: retq
+ %r = uitofp i32 %x to float
+ ret float %r
+}
+
+define float @test_with_nneg(i32 %x) nounwind {
+; X86-LABEL: test_with_nneg:
+; X86: ## %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: orpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: cvtsd2ss %xmm0, %xmm0
+; X86-NEXT: movss %xmm0, (%esp)
+; X86-NEXT: flds (%esp)
+; X86-NEXT: popl %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: test_with_nneg:
+; X64: ## %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cvtsi2ss %rax, %xmm0
+; X64-NEXT: retq
+ %r = uitofp nneg i32 %x to float
+ ret float %r
+}
+
+define <4 x float> @test_with_nneg_vec(<4 x i32> %x) nounwind {
+; X86-LABEL: test_with_nneg_vec:
+; X86: ## %bb.0:
+; X86-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; X86-NEXT: pand %xmm0, %xmm1
+; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-NEXT: psrld $16, %xmm0
+; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: addps %xmm1, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_with_nneg_vec:
+; X64: ## %bb.0:
+; X64-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; X64-NEXT: pand %xmm0, %xmm1
+; X64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; X64-NEXT: psrld $16, %xmm0
+; X64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: addps %xmm1, %xmm0
+; X64-NEXT: retq
+ %r = uitofp nneg <4 x i32> %x to <4 x float>
+ ret <4 x float> %r
+}
>From daeeac732e2b7e4b1d9d5e58ffc2c310faccc30a Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 22 Mar 2024 11:42:02 -0500
Subject: [PATCH 2/2] [X86] Use `nneg` flag when trying to convert `uitofp` ->
`sitofp`
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 5 +++--
llvm/test/CodeGen/X86/uint_to_fp.ll | 24 ++++--------------------
2 files changed, 7 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1ce742a7c19a74..c5b391df20c837 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48270,7 +48270,7 @@ static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG,
// We do not split for SSE at all, but we need to split vectors for AVX1 and
// AVX2.
- if (!Subtarget.useAVX512Regs() && VT.is512BitVector() &&
+ if (!Subtarget.useAVX512Regs() && VT.is512BitVector() &&
TLI.isTypeLegal(VT.getHalfNumVectorElementsVT(*DAG.getContext()))) {
SDValue LoX, HiX;
std::tie(LoX, HiX) = splitVector(X, DAG, DL);
@@ -54065,7 +54065,8 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
// the optimization here.
- if (DAG.SignBitIsZero(Op0)) {
+ SDNodeFlags Flags = N->getFlags();
+ if (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0)) {
if (IsStrict)
return DAG.getNode(ISD::STRICT_SINT_TO_FP, SDLoc(N), {VT, MVT::Other},
{N->getOperand(0), Op0});
diff --git a/llvm/test/CodeGen/X86/uint_to_fp.ll b/llvm/test/CodeGen/X86/uint_to_fp.ll
index 8b9dfedb8da02f..8c8cbb151974d6 100644
--- a/llvm/test/CodeGen/X86/uint_to_fp.ll
+++ b/llvm/test/CodeGen/X86/uint_to_fp.ll
@@ -52,10 +52,7 @@ define float @test_with_nneg(i32 %x) nounwind {
; X86-LABEL: test_with_nneg:
; X86: ## %bb.0:
; X86-NEXT: pushl %eax
-; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: orpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT: cvtsd2ss %xmm0, %xmm0
+; X86-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movss %xmm0, (%esp)
; X86-NEXT: flds (%esp)
; X86-NEXT: popl %eax
@@ -63,8 +60,7 @@ define float @test_with_nneg(i32 %x) nounwind {
;
; X64-LABEL: test_with_nneg:
; X64: ## %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: cvtsi2ss %rax, %xmm0
+; X64-NEXT: cvtsi2ss %edi, %xmm0
; X64-NEXT: retq
%r = uitofp nneg i32 %x to float
ret float %r
@@ -73,24 +69,12 @@ define float @test_with_nneg(i32 %x) nounwind {
define <4 x float> @test_with_nneg_vec(<4 x i32> %x) nounwind {
; X86-LABEL: test_with_nneg_vec:
; X86: ## %bb.0:
-; X86-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; X86-NEXT: pand %xmm0, %xmm1
-; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-NEXT: psrld $16, %xmm0
-; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT: addps %xmm1, %xmm0
+; X86-NEXT: cvtdq2ps %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: test_with_nneg_vec:
; X64: ## %bb.0:
-; X64-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; X64-NEXT: pand %xmm0, %xmm1
-; X64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-NEXT: psrld $16, %xmm0
-; X64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: addps %xmm1, %xmm0
+; X64-NEXT: cvtdq2ps %xmm0, %xmm0
; X64-NEXT: retq
%r = uitofp nneg <4 x i32> %x to <4 x float>
ret <4 x float> %r
More information about the llvm-commits
mailing list