[llvm] [X86] Combine FRINT + FP_TO_SINT to LRINT (PR #126477)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 9 23:17:04 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Phoebe Wang (phoebewang)
<details>
<summary>Changes</summary>
Based on Craig's suggestion on #<!-- -->126217
---
Full diff: https://github.com/llvm/llvm-project/pull/126477.diff
2 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+14)
- (added) llvm/test/CodeGen/X86/rint-conv.ll (+105)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 744e4e740cb2102..615832d6f787cea 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2684,6 +2684,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::ZERO_EXTEND_VECTOR_INREG,
ISD::SINT_TO_FP,
ISD::UINT_TO_FP,
+ ISD::FP_TO_SINT,
ISD::STRICT_SINT_TO_FP,
ISD::STRICT_UINT_TO_FP,
ISD::FP_TO_SINT_SAT,
@@ -56380,6 +56381,18 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineFPToSInt(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue Src = N->getOperand(0);
+ if (Src.getOpcode() == ISD::FRINT && VT.getScalarType() == MVT::i32 &&
+ Src->getFlags().hasNoNaNs() && Src->getFlags().hasNoInfs() &&
+ Src.hasOneUse())
+ return DAG.getNode(ISD::LRINT, SDLoc(N), VT, Src.getOperand(0));
+
+ return SDValue();
+}
+
// Custom handling for VCVTTPS2QQS/VCVTTPS2UQQS
static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -59405,6 +59418,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::UINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
return combineUIntToFP(N, DAG, Subtarget);
+ case ISD::FP_TO_SINT: return combineFPToSInt(N, DAG, Subtarget);
case ISD::LRINT:
case ISD::LLRINT: return combineLRINT_LLRINT(N, DAG, Subtarget);
case ISD::FADD:
diff --git a/llvm/test/CodeGen/X86/rint-conv.ll b/llvm/test/CodeGen/X86/rint-conv.ll
new file mode 100644
index 000000000000000..90698ef3ecd03f5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/rint-conv.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
+
+define i32 @no_combine_f32(float %x) nounwind {
+; X86-LABEL: no_combine_f32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: movss %xmm0, (%esp)
+; X86-NEXT: calll rintf
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: cvttss2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: no_combine_f32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: callq rintf at PLT
+; X64-NEXT: cvttss2si %xmm0, %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+entry:
+ %0 = tail call float @llvm.rint.f32(float %x)
+ %1 = fptosi float %0 to i32
+ ret i32 %1
+}
+
+define i32 @combine_f32(float %x) nounwind {
+; X86-LABEL: combine_f32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: combine_f32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvtss2si %xmm0, %eax
+; X64-NEXT: retq
+entry:
+ %0 = tail call nnan ninf float @llvm.rint.f32(float %x)
+ %1 = fptosi float %0 to i32
+ ret i32 %1
+}
+
+define i32 @no_combine_f64(double %x) nounwind {
+; X86-LABEL: no_combine_f64:
+; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: movsd %xmm0, (%esp)
+; X86-NEXT: calll rint
+; X86-NEXT: fstpl {{[0-9]+}}(%esp)
+; X86-NEXT: cvttsd2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X64-LABEL: no_combine_f64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: callq rint at PLT
+; X64-NEXT: cvttsd2si %xmm0, %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+entry:
+ %0 = tail call double @llvm.rint.f64(double %x)
+ %1 = fptosi double %0 to i32
+ ret i32 %1
+}
+
+define i32 @combine_f64(double %x) nounwind {
+; X86-LABEL: combine_f64:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: combine_f64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvtsd2si %xmm0, %eax
+; X64-NEXT: retq
+entry:
+ %0 = tail call nnan ninf double @llvm.rint.f32(double %x)
+ %1 = fptosi double %0 to i32
+ ret i32 %1
+}
+
+define <4 x i32> @combine_v4f32(<4 x float> %x) nounwind {
+; X86-LABEL: combine_v4f32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cvtps2dq %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: combine_v4f32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvtps2dq %xmm0, %xmm0
+; X64-NEXT: retq
+entry:
+ %0 = tail call nnan ninf <4 x float> @llvm.rint.v4f32(<4 x float> %x)
+ %1 = fptosi <4 x float> %0 to <4 x i32>
+ ret <4 x i32> %1
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/126477
More information about the llvm-commits
mailing list