[llvm] [X86][DAG] Use nneg flag when trying to convert uitofp -> sitofp (PR #86694)

Wed Mar 27 12:27:03 PDT 2024

https://github.com/goldsteinn updated https://github.com/llvm/llvm-project/pull/86694

>From 31a4a74d2fdd95860e28e9d4f3271792c66ce243 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 27 Mar 2024 13:07:46 -0500
Subject: [PATCH 1/3] [DAG] Add support for `nneg` flag with `uitofp`

Copy `nneg` flag when building `UINT_TO_FP` from `uitofp` and use
`nneg` flag in the one place we transform `UINT_TO_FP` -> `SINT_TO_FP`
if the operand is non-negative.
---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 5db32472d197f5..dff1ade1e199eb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3882,7 +3882,11 @@ void SelectionDAGBuilder::visitUIToFP(const User &I) {
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
                                                         I.getType());
-  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
+  SDNodeFlags Flags;
+  if (auto *PNI = dyn_cast<PossiblyNonNegInst>(&I))
+    Flags.setNonNeg(PNI->hasNonNeg());
+
+  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N, Flags));
 }
 
 void SelectionDAGBuilder::visitSIToFP(const User &I) {

>From 2485ad6c6f972582ab88662c0b579bb66a35efe8 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 22 Mar 2024 12:29:30 -0500
Subject: [PATCH 2/3] [X86] Add tests for `uitofp nneg` -> `sitofp`; NFC

---
 llvm/test/CodeGen/X86/uint_to_fp.ll | 70 +++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/llvm/test/CodeGen/X86/uint_to_fp.ll b/llvm/test/CodeGen/X86/uint_to_fp.ll
index d8e0b61ed199fa..8b9dfedb8da02f 100644
--- a/llvm/test/CodeGen/X86/uint_to_fp.ll
+++ b/llvm/test/CodeGen/X86/uint_to_fp.ll
@@ -25,3 +25,73 @@ entry:
   store float %1, ptr %y
   ret void
 }
+
+define float @test_without_nneg(i32 %x) nounwind {
+; X86-LABEL: test_without_nneg:
+; X86:       ## %bb.0:
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    orpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT:    subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT:    cvtsd2ss %xmm0, %xmm0
+; X86-NEXT:    movss %xmm0, (%esp)
+; X86-NEXT:    flds (%esp)
+; X86-NEXT:    popl %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_without_nneg:
+; X64:       ## %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cvtsi2ss %rax, %xmm0
+; X64-NEXT:    retq
+  %r = uitofp i32 %x to float
+  ret float %r
+}
+
+define float @test_with_nneg(i32 %x) nounwind {
+; X86-LABEL: test_with_nneg:
+; X86:       ## %bb.0:
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    orpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT:    subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT:    cvtsd2ss %xmm0, %xmm0
+; X86-NEXT:    movss %xmm0, (%esp)
+; X86-NEXT:    flds (%esp)
+; X86-NEXT:    popl %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_with_nneg:
+; X64:       ## %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cvtsi2ss %rax, %xmm0
+; X64-NEXT:    retq
+  %r = uitofp nneg i32 %x to float
+  ret float %r
+}
+
+define <4 x float> @test_with_nneg_vec(<4 x i32> %x) nounwind {
+; X86-LABEL: test_with_nneg_vec:
+; X86:       ## %bb.0:
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; X86-NEXT:    pand %xmm0, %xmm1
+; X86-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-NEXT:    psrld $16, %xmm0
+; X86-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT:    addps %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_with_nneg_vec:
+; X64:       ## %bb.0:
+; X64-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; X64-NEXT:    pand %xmm0, %xmm1
+; X64-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; X64-NEXT:    psrld $16, %xmm0
+; X64-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT:    addps %xmm1, %xmm0
+; X64-NEXT:    retq
+  %r = uitofp nneg <4 x i32> %x to <4 x float>
+  ret <4 x float> %r
+}

>From 7c2428592eeec42f7c153b040c26f5c53e5171c0 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 22 Mar 2024 11:42:02 -0500
Subject: [PATCH 3/3] [X86] Use `nneg` flag when trying to convert `uitofp` ->
 `sitofp`

---
 llvm/lib/Target/X86/X86ISelLowering.cpp |  3 ++-
 llvm/test/CodeGen/X86/uint_to_fp.ll     | 24 ++++--------------------
 2 files changed, 6 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9d98d31b31df0b..0e6817e68ef2c2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54095,7 +54095,8 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
   // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
   // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
   // the optimization here.
-  if (DAG.SignBitIsZero(Op0)) {
+  SDNodeFlags Flags = N->getFlags();
+  if (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0)) {
     if (IsStrict)
       return DAG.getNode(ISD::STRICT_SINT_TO_FP, SDLoc(N), {VT, MVT::Other},
                          {N->getOperand(0), Op0});
diff --git a/llvm/test/CodeGen/X86/uint_to_fp.ll b/llvm/test/CodeGen/X86/uint_to_fp.ll
index 8b9dfedb8da02f..8c8cbb151974d6 100644
--- a/llvm/test/CodeGen/X86/uint_to_fp.ll
+++ b/llvm/test/CodeGen/X86/uint_to_fp.ll
@@ -52,10 +52,7 @@ define float @test_with_nneg(i32 %x) nounwind {
 ; X86-LABEL: test_with_nneg:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    pushl %eax
-; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT:    orpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT:    subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT:    cvtsd2ss %xmm0, %xmm0
+; X86-NEXT:    cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
 ; X86-NEXT:    movss %xmm0, (%esp)
 ; X86-NEXT:    flds (%esp)
 ; X86-NEXT:    popl %eax
@@ -63,8 +60,7 @@ define float @test_with_nneg(i32 %x) nounwind {
 ;
 ; X64-LABEL: test_with_nneg:
 ; X64:       ## %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    cvtsi2ss %rax, %xmm0
+; X64-NEXT:    cvtsi2ss %edi, %xmm0
 ; X64-NEXT:    retq
   %r = uitofp nneg i32 %x to float
   ret float %r
@@ -73,24 +69,12 @@ define float @test_with_nneg(i32 %x) nounwind {
 define <4 x float> @test_with_nneg_vec(<4 x i32> %x) nounwind {
 ; X86-LABEL: test_with_nneg_vec:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; X86-NEXT:    pand %xmm0, %xmm1
-; X86-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-NEXT:    psrld $16, %xmm0
-; X86-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT:    addps %xmm1, %xmm0
+; X86-NEXT:    cvtdq2ps %xmm0, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_with_nneg_vec:
 ; X64:       ## %bb.0:
-; X64-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; X64-NEXT:    pand %xmm0, %xmm1
-; X64-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; X64-NEXT:    psrld $16, %xmm0
-; X64-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT:    addps %xmm1, %xmm0
+; X64-NEXT:    cvtdq2ps %xmm0, %xmm0
 ; X64-NEXT:    retq
   %r = uitofp nneg <4 x i32> %x to <4 x float>
   ret <4 x float> %r