[llvm] [X86] Combine FRINT + FP_TO_SINT to LRINT (PR #126477)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 17 00:59:14 PST 2025
https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/126477
>From 22255965c0251b47bc36b03745213b4d2b007ada Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 10 Feb 2025 15:13:03 +0800
Subject: [PATCH 1/4] [X86] Combine FRINT + FP_TO_SINT to LRINT
Based on Craig's suggestion on #126217
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 14 ++++
llvm/test/CodeGen/X86/rint-conv.ll | 105 ++++++++++++++++++++++++
2 files changed, 119 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/rint-conv.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 744e4e740cb21..615832d6f787c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2684,6 +2684,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::ZERO_EXTEND_VECTOR_INREG,
ISD::SINT_TO_FP,
ISD::UINT_TO_FP,
+ ISD::FP_TO_SINT,
ISD::STRICT_SINT_TO_FP,
ISD::STRICT_UINT_TO_FP,
ISD::FP_TO_SINT_SAT,
@@ -56380,6 +56381,18 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineFPToSInt(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue Src = N->getOperand(0);
+ if (Src.getOpcode() == ISD::FRINT && VT.getScalarType() == MVT::i32 &&
+ Src->getFlags().hasNoNaNs() && Src->getFlags().hasNoInfs() &&
+ Src.hasOneUse())
+ return DAG.getNode(ISD::LRINT, SDLoc(N), VT, Src.getOperand(0));
+
+ return SDValue();
+}
+
// Custom handling for VCVTTPS2QQS/VCVTTPS2UQQS
static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -59405,6 +59418,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::UINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
return combineUIntToFP(N, DAG, Subtarget);
+ case ISD::FP_TO_SINT: return combineFPToSInt(N, DAG, Subtarget);
case ISD::LRINT:
case ISD::LLRINT: return combineLRINT_LLRINT(N, DAG, Subtarget);
case ISD::FADD:
diff --git a/llvm/test/CodeGen/X86/rint-conv.ll b/llvm/test/CodeGen/X86/rint-conv.ll
new file mode 100644
index 0000000000000..90698ef3ecd03
--- /dev/null
+++ b/llvm/test/CodeGen/X86/rint-conv.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
+
+define i32 @no_combine_f32(float %x) nounwind {
+; X86-LABEL: no_combine_f32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: movss %xmm0, (%esp)
+; X86-NEXT: calll rintf
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: cvttss2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: no_combine_f32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: callq rintf at PLT
+; X64-NEXT: cvttss2si %xmm0, %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+entry:
+ %0 = tail call float @llvm.rint.f32(float %x)
+ %1 = fptosi float %0 to i32
+ ret i32 %1
+}
+
+define i32 @combine_f32(float %x) nounwind {
+; X86-LABEL: combine_f32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: combine_f32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvtss2si %xmm0, %eax
+; X64-NEXT: retq
+entry:
+ %0 = tail call nnan ninf float @llvm.rint.f32(float %x)
+ %1 = fptosi float %0 to i32
+ ret i32 %1
+}
+
+define i32 @no_combine_f64(double %x) nounwind {
+; X86-LABEL: no_combine_f64:
+; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: movsd %xmm0, (%esp)
+; X86-NEXT: calll rint
+; X86-NEXT: fstpl {{[0-9]+}}(%esp)
+; X86-NEXT: cvttsd2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X64-LABEL: no_combine_f64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: callq rint at PLT
+; X64-NEXT: cvttsd2si %xmm0, %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+entry:
+ %0 = tail call double @llvm.rint.f64(double %x)
+ %1 = fptosi double %0 to i32
+ ret i32 %1
+}
+
+define i32 @combine_f64(double %x) nounwind {
+; X86-LABEL: combine_f64:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: combine_f64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvtsd2si %xmm0, %eax
+; X64-NEXT: retq
+entry:
+ %0 = tail call nnan ninf double @llvm.rint.f32(double %x)
+ %1 = fptosi double %0 to i32
+ ret i32 %1
+}
+
+define <4 x i32> @combine_v4f32(<4 x float> %x) nounwind {
+; X86-LABEL: combine_v4f32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cvtps2dq %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: combine_v4f32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvtps2dq %xmm0, %xmm0
+; X64-NEXT: retq
+entry:
+ %0 = tail call nnan ninf <4 x float> @llvm.rint.v4f32(<4 x float> %x)
+ %1 = fptosi <4 x float> %0 to <4 x i32>
+ ret <4 x i32> %1
+}
>From 9a70a3e8d3becabf65b5195a1a2aff830f5c8d0e Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 10 Feb 2025 16:08:42 +0800
Subject: [PATCH 2/4] Remove fast math constraint
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 1 -
llvm/test/CodeGen/X86/rint-conv.ll | 60 ++-----------------------
2 files changed, 3 insertions(+), 58 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 615832d6f787c..d540fb38faec6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -56386,7 +56386,6 @@ static SDValue combineFPToSInt(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(0);
if (Src.getOpcode() == ISD::FRINT && VT.getScalarType() == MVT::i32 &&
- Src->getFlags().hasNoNaNs() && Src->getFlags().hasNoInfs() &&
Src.hasOneUse())
return DAG.getNode(ISD::LRINT, SDLoc(N), VT, Src.getOperand(0));
diff --git a/llvm/test/CodeGen/X86/rint-conv.ll b/llvm/test/CodeGen/X86/rint-conv.ll
index 90698ef3ecd03..6eb5678ade572 100644
--- a/llvm/test/CodeGen/X86/rint-conv.ll
+++ b/llvm/test/CodeGen/X86/rint-conv.ll
@@ -2,31 +2,6 @@
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
-define i32 @no_combine_f32(float %x) nounwind {
-; X86-LABEL: no_combine_f32:
-; X86: # %bb.0: # %entry
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: movss %xmm0, (%esp)
-; X86-NEXT: calll rintf
-; X86-NEXT: fstps {{[0-9]+}}(%esp)
-; X86-NEXT: cvttss2si {{[0-9]+}}(%esp), %eax
-; X86-NEXT: addl $8, %esp
-; X86-NEXT: retl
-;
-; X64-LABEL: no_combine_f32:
-; X64: # %bb.0: # %entry
-; X64-NEXT: pushq %rax
-; X64-NEXT: callq rintf at PLT
-; X64-NEXT: cvttss2si %xmm0, %eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
-entry:
- %0 = tail call float @llvm.rint.f32(float %x)
- %1 = fptosi float %0 to i32
- ret i32 %1
-}
-
define i32 @combine_f32(float %x) nounwind {
; X86-LABEL: combine_f32:
; X86: # %bb.0: # %entry
@@ -38,40 +13,11 @@ define i32 @combine_f32(float %x) nounwind {
; X64-NEXT: cvtss2si %xmm0, %eax
; X64-NEXT: retq
entry:
- %0 = tail call nnan ninf float @llvm.rint.f32(float %x)
+ %0 = tail call float @llvm.rint.f32(float %x)
%1 = fptosi float %0 to i32
ret i32 %1
}
-define i32 @no_combine_f64(double %x) nounwind {
-; X86-LABEL: no_combine_f64:
-; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-NEXT: movsd %xmm0, (%esp)
-; X86-NEXT: calll rint
-; X86-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NEXT: cvttsd2si {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ebp, %esp
-; X86-NEXT: popl %ebp
-; X86-NEXT: retl
-;
-; X64-LABEL: no_combine_f64:
-; X64: # %bb.0: # %entry
-; X64-NEXT: pushq %rax
-; X64-NEXT: callq rint at PLT
-; X64-NEXT: cvttsd2si %xmm0, %eax
-; X64-NEXT: popq %rcx
-; X64-NEXT: retq
-entry:
- %0 = tail call double @llvm.rint.f64(double %x)
- %1 = fptosi double %0 to i32
- ret i32 %1
-}
-
define i32 @combine_f64(double %x) nounwind {
; X86-LABEL: combine_f64:
; X86: # %bb.0: # %entry
@@ -83,7 +29,7 @@ define i32 @combine_f64(double %x) nounwind {
; X64-NEXT: cvtsd2si %xmm0, %eax
; X64-NEXT: retq
entry:
- %0 = tail call nnan ninf double @llvm.rint.f32(double %x)
+ %0 = tail call double @llvm.rint.f32(double %x)
%1 = fptosi double %0 to i32
ret i32 %1
}
@@ -99,7 +45,7 @@ define <4 x i32> @combine_v4f32(<4 x float> %x) nounwind {
; X64-NEXT: cvtps2dq %xmm0, %xmm0
; X64-NEXT: retq
entry:
- %0 = tail call nnan ninf <4 x float> @llvm.rint.v4f32(<4 x float> %x)
+ %0 = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %x)
%1 = fptosi <4 x float> %0 to <4 x i32>
ret <4 x i32> %1
}
>From f7eb84b1a206d79c3fb57c913c6e4ef10e7ab5ad Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 10 Feb 2025 20:54:22 +0800
Subject: [PATCH 3/4] Add v2f64 / v4f64 cases and AVX test coverage
---
llvm/test/CodeGen/X86/rint-conv.ll | 101 +++++++++++++++++++++++++++++
1 file changed, 101 insertions(+)
diff --git a/llvm/test/CodeGen/X86/rint-conv.ll b/llvm/test/CodeGen/X86/rint-conv.ll
index 6eb5678ade572..1e80cd736978b 100644
--- a/llvm/test/CodeGen/X86/rint-conv.ll
+++ b/llvm/test/CodeGen/X86/rint-conv.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX
define i32 @combine_f32(float %x) nounwind {
; X86-LABEL: combine_f32:
@@ -12,6 +13,11 @@ define i32 @combine_f32(float %x) nounwind {
; X64: # %bb.0: # %entry
; X64-NEXT: cvtss2si %xmm0, %eax
; X64-NEXT: retq
+;
+; AVX-LABEL: combine_f32:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtss2si %xmm0, %eax
+; AVX-NEXT: retq
entry:
%0 = tail call float @llvm.rint.f32(float %x)
%1 = fptosi float %0 to i32
@@ -28,6 +34,11 @@ define i32 @combine_f64(double %x) nounwind {
; X64: # %bb.0: # %entry
; X64-NEXT: cvtsd2si %xmm0, %eax
; X64-NEXT: retq
+;
+; AVX-LABEL: combine_f64:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtsd2si %xmm0, %eax
+; AVX-NEXT: retq
entry:
%0 = tail call double @llvm.rint.f32(double %x)
%1 = fptosi double %0 to i32
@@ -44,8 +55,98 @@ define <4 x i32> @combine_v4f32(<4 x float> %x) nounwind {
; X64: # %bb.0: # %entry
; X64-NEXT: cvtps2dq %xmm0, %xmm0
; X64-NEXT: retq
+;
+; AVX-LABEL: combine_v4f32:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtps2dq %xmm0, %xmm0
+; AVX-NEXT: retq
entry:
%0 = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %x)
%1 = fptosi <4 x float> %0 to <4 x i32>
ret <4 x i32> %1
}
+
+define <2 x i32> @combine_v2f64(<2 x double> %x) nounwind {
+; X86-LABEL: combine_v2f64:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cvtsd2si %xmm0, %eax
+; X86-NEXT: movd %eax, %xmm1
+; X86-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
+; X86-NEXT: cvtsd2si %xmm0, %eax
+; X86-NEXT: movd %eax, %xmm0
+; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-NEXT: movdqa %xmm1, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: combine_v2f64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvtsd2si %xmm0, %eax
+; X64-NEXT: movd %eax, %xmm1
+; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
+; X64-NEXT: cvtsd2si %xmm0, %eax
+; X64-NEXT: movd %eax, %xmm0
+; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: movdqa %xmm1, %xmm0
+; X64-NEXT: retq
+;
+; AVX-LABEL: combine_v2f64:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-NEXT: vcvtsd2si %xmm1, %eax
+; AVX-NEXT: vcvtsd2si %xmm0, %ecx
+; AVX-NEXT: vmovd %ecx, %xmm0
+; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %0 = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %x)
+ %1 = fptosi <2 x double> %0 to <2 x i32>
+ ret <2 x i32> %1
+}
+
+define <4 x i32> @combine_v4f64(<4 x double> %x) nounwind {
+; X86-LABEL: combine_v4f64:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cvtsd2si %xmm1, %eax
+; X86-NEXT: movd %eax, %xmm2
+; X86-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
+; X86-NEXT: cvtsd2si %xmm1, %eax
+; X86-NEXT: movd %eax, %xmm1
+; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X86-NEXT: cvtsd2si %xmm0, %eax
+; X86-NEXT: movd %eax, %xmm1
+; X86-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
+; X86-NEXT: cvtsd2si %xmm0, %eax
+; X86-NEXT: movd %eax, %xmm0
+; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X86-NEXT: movdqa %xmm1, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: combine_v4f64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvtsd2si %xmm1, %eax
+; X64-NEXT: movd %eax, %xmm2
+; X64-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
+; X64-NEXT: cvtsd2si %xmm1, %eax
+; X64-NEXT: movd %eax, %xmm1
+; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X64-NEXT: cvtsd2si %xmm0, %eax
+; X64-NEXT: movd %eax, %xmm1
+; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
+; X64-NEXT: cvtsd2si %xmm0, %eax
+; X64-NEXT: movd %eax, %xmm0
+; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X64-NEXT: movdqa %xmm1, %xmm0
+; X64-NEXT: retq
+;
+; AVX-LABEL: combine_v4f64:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtpd2dq %ymm0, %xmm0
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+entry:
+ %0 = tail call <4 x double> @llvm.rint.v4f64(<4 x double> %x)
+ %1 = fptosi <4 x double> %0 to <4 x i32>
+ ret <4 x i32> %1
+}
>From 114d435170005851d3009e29fc7dbd627f8b27b7 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 17 Feb 2025 16:58:57 +0800
Subject: [PATCH 4/4] Check SSE2
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 876417ff456bb..20227a181788a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -56405,8 +56405,8 @@ static SDValue combineFPToSInt(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(0);
- if (Src.getOpcode() == ISD::FRINT && VT.getScalarType() == MVT::i32 &&
- Src.hasOneUse())
+ if (Subtarget.hasSSE2() && Src.getOpcode() == ISD::FRINT &&
+ VT.getScalarType() == MVT::i32 && Src.hasOneUse())
return DAG.getNode(ISD::LRINT, SDLoc(N), VT, Src.getOperand(0));
return SDValue();
More information about the llvm-commits
mailing list