[llvm] 353fbeb - [DAGCombiner] Simplifying `{si|ui}tofp` when only signbit is needed
Noah Goldstein via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 19 15:17:52 PDT 2024
Author: Noah Goldstein
Date: 2024-03-19T17:17:35-05:00
New Revision: 353fbeb0a294d2c7cef6d88607fa0fd50ee81462
URL: https://github.com/llvm/llvm-project/commit/353fbeb0a294d2c7cef6d88607fa0fd50ee81462
DIFF: https://github.com/llvm/llvm-project/commit/353fbeb0a294d2c7cef6d88607fa0fd50ee81462.diff
LOG: [DAGCombiner] Simplifying `{si|ui}tofp` when only signbit is needed
If we only need the signbit `uitofp` simplified to 0, and `sitofp`
simplifies to `bitcast`.
Closes #85138
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
llvm/test/CodeGen/X86/int-to-fp-demanded.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 57f8fc409de453..16069c6c0dc317 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -611,6 +611,25 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
return false;
}
+static SDValue simplifyUseOfIntToFP(SDValue Op, const APInt &DemandedBits,
+ SelectionDAG &DAG) {
+ unsigned Opc = Op.getOpcode();
+ assert((Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP) &&
+ "Invalid Int -> FP Opcode");
+ if (!DemandedBits.isSignMask())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (Opc == ISD::UINT_TO_FP)
+ return DAG.getConstant(0, SDLoc(Op), VT);
+
+ EVT InnerVT = Op.getOperand(0).getValueType();
+ if (VT.getScalarSizeInBits() == InnerVT.getScalarSizeInBits())
+ return DAG.getBitcast(VT, Op.getOperand(0));
+
+ return SDValue();
+}
+
bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -816,6 +835,11 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
}
break;
}
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ if (SDValue R = simplifyUseOfIntToFP(Op, DemandedBits, DAG))
+ return R;
+ break;
case ISD::SIGN_EXTEND_INREG: {
// If none of the extended bits are demanded, eliminate the sextinreg.
SDValue Op0 = Op.getOperand(0);
@@ -2313,6 +2337,12 @@ bool TargetLowering::SimplifyDemandedBits(
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
break;
}
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ if (SDValue R = simplifyUseOfIntToFP(Op, DemandedBits, TLO.DAG))
+ return TLO.CombineTo(Op, R);
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ break;
case ISD::SIGN_EXTEND_INREG: {
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
diff --git a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
index cbb5bd09c2399a..a332b3e8908003 100644
--- a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
@@ -164,14 +164,13 @@ define <4 x float> @demandedbits_sitofp_blendvps(<4 x float> %a0, <4 x float> %a
; SSE-LABEL: demandedbits_sitofp_blendvps:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm3
-; SSE-NEXT: cvtdq2ps %xmm2, %xmm0
+; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3
; SSE-NEXT: movaps %xmm3, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: demandedbits_sitofp_blendvps:
; AVX: # %bb.0:
-; AVX-NEXT: vcvtdq2ps %xmm2, %xmm2
; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%cvt = sitofp <4 x i32> %a2 to <4 x float>
diff --git a/llvm/test/CodeGen/X86/int-to-fp-demanded.ll b/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
index cdde03fb0534b9..8652136ae5cd94 100644
--- a/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
+++ b/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
@@ -7,19 +7,13 @@ declare void @use.i32(i32)
define i32 @sitofp_signbit_only(i32 %i_in) nounwind {
; X86-LABEL: sitofp_signbit_only:
; X86: # %bb.0:
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, (%esp)
-; X86-NEXT: fildl (%esp)
-; X86-NEXT: fstps {{[0-9]+}}(%esp)
; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: addl $8, %esp
; X86-NEXT: retl
;
; X64-LABEL: sitofp_signbit_only:
; X64: # %bb.0:
-; X64-NEXT: cvtsi2ss %edi, %xmm0
+; X64-NEXT: movd %edi, %xmm0
; X64-NEXT: movmskps %xmm0, %eax
; X64-NEXT: shll $31, %eax
; X64-NEXT: retq
@@ -44,8 +38,8 @@ define i32 @sitofp_signbit_only_okay_width(i16 %i_in) nounwind {
;
; X64-LABEL: sitofp_signbit_only_okay_width:
; X64: # %bb.0:
-; X64-NEXT: movswl %di, %eax
-; X64-NEXT: cvtsi2ss %eax, %xmm0
+; X64-NEXT: shll $16, %edi
+; X64-NEXT: movd %edi, %xmm0
; X64-NEXT: movmskps %xmm0, %eax
; X64-NEXT: shll $31, %eax
; X64-NEXT: retq
@@ -82,15 +76,14 @@ define <2 x i16> @sitofp_signbit_only_fail_bad_width2(i32 %i_in) nounwind {
; X86-LABEL: sitofp_signbit_only_fail_bad_width2:
; X86: # %bb.0:
; X86-NEXT: subl $8, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, (%esp)
; X86-NEXT: fildl (%esp)
; X86-NEXT: fstps {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %edx
; X86-NEXT: shrl $16, %edx
-; X86-NEXT: andl $32768, %eax # imm = 0x8000
; X86-NEXT: andl $32768, %edx # imm = 0x8000
+; X86-NEXT: movl $32768, %eax # imm = 0x8000
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: # kill: def $dx killed $dx killed $edx
; X86-NEXT: addl $8, %esp
More information about the llvm-commits
mailing list