[llvm] 0c95516 - [X86] Generate cvtpd2dq for (v2i32 lrint(v2f64)) (#126508)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 10 21:14:44 PST 2025
Author: Phoebe Wang
Date: 2025-02-11T13:14:41+08:00
New Revision: 0c955167c241fd64f3a6737c06fa54de46165cac
URL: https://github.com/llvm/llvm-project/commit/0c955167c241fd64f3a6737c06fa54de46165cac
DIFF: https://github.com/llvm/llvm-project/commit/0c955167c241fd64f3a6737c06fa54de46165cac.diff
LOG: [X86] Generate cvtpd2dq for (v2i32 lrint(v2f64)) (#126508)
Found when addressing comment on #126477
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-lrint.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 995b4de12ce12c2..8c28985c8e8e7da 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1159,6 +1159,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
setOperationAction(ISD::LRINT, MVT::v4f32, Custom);
+ setOperationAction(ISD::LRINT, MVT::v2i32, Custom);
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
@@ -34029,6 +34030,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::LRINT:
+ if (N->getValueType(0) == MVT::v2i32) {
+ SDValue Src = N->getOperand(0);
+ if (Src.getValueType() == MVT::v2f64)
+ Results.push_back(DAG.getNode(X86ISD::CVTP2SI, dl, MVT::v4i32, Src));
+ return;
+ }
+ [[fallthrough]];
case ISD::LLRINT: {
if (SDValue V = LRINT_LLRINTHelper(N, DAG))
Results.push_back(V);
diff --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll
index 3612205bf1bfa9c..b1c8d46f497f320 100644
--- a/llvm/test/CodeGen/X86/vector-lrint.ll
+++ b/llvm/test/CodeGen/X86/vector-lrint.ll
@@ -269,31 +269,17 @@ declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
; X86-SSE2-LABEL: lrint_v2f64:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm0
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
+; X86-SSE2-NEXT: cvtpd2dq %xmm0, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: lrint_v2f64:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; X86-AVX-NEXT: vcvtsd2si %xmm1, %eax
-; X86-AVX-NEXT: vcvtsd2si %xmm0, %ecx
-; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; X86-AVX-NEXT: vcvtpd2dq %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-AVX-i32-LABEL: lrint_v2f64:
; X64-AVX-i32: # %bb.0:
-; X64-AVX-i32-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; X64-AVX-i32-NEXT: vcvtsd2si %xmm1, %eax
-; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %ecx
-; X64-AVX-i32-NEXT: vmovd %ecx, %xmm0
-; X64-AVX-i32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; X64-AVX-i32-NEXT: vcvtpd2dq %xmm0, %xmm0
; X64-AVX-i32-NEXT: retq
;
; X64-AVX1-i64-LABEL: lrint_v2f64:
@@ -328,20 +314,9 @@ declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
; X86-SSE2-LABEL: lrint_v4f64:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm2
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm0
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
+; X86-SSE2-NEXT: cvtpd2dq %xmm1, %xmm1
+; X86-SSE2-NEXT: cvtpd2dq %xmm0, %xmm0
+; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: lrint_v4f64:
@@ -411,34 +386,12 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
; X86-SSE2-NEXT: andl $-16, %esp
; X86-SSE2-NEXT: subl $16, %esp
-; X86-SSE2-NEXT: movapd %xmm0, %xmm3
-; X86-SSE2-NEXT: movapd 8(%ebp), %xmm4
-; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm5
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm0
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
-; X86-SSE2-NEXT: cvtsd2si %xmm3, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm0
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm3, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
-; X86-SSE2-NEXT: cvtsd2si %xmm4, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm3
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm4, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; X86-SSE2-NEXT: cvtsd2si %xmm2, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm2, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm2
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; X86-SSE2-NEXT: cvtpd2dq %xmm1, %xmm1
+; X86-SSE2-NEXT: cvtpd2dq %xmm0, %xmm0
+; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X86-SSE2-NEXT: cvtpd2dq %xmm2, %xmm1
+; X86-SSE2-NEXT: cvtpd2dq 8(%ebp), %xmm2
+; X86-SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
More information about the llvm-commits
mailing list