[llvm] [X86] Generate cvtpd2dq for (v2i32 lrint(v2f64)) (PR #126508)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 10 04:53:57 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Phoebe Wang (phoebewang)
<details>
<summary>Changes</summary>
Found when addressing comment on #<!-- -->126477
---
Full diff: https://github.com/llvm/llvm-project/pull/126508.diff
2 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+8)
- (modified) llvm/test/CodeGen/X86/vector-lrint.ll (+12-59)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 744e4e740cb2102..72f6fd7a96c3d28 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1159,6 +1159,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
setOperationAction(ISD::LRINT, MVT::v4f32, Custom);
+ setOperationAction(ISD::LRINT, MVT::v2i32, Custom);
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
@@ -34017,6 +34018,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::LRINT:
+ if (N->getValueType(0) == MVT::v2i32) {
+ SDValue Src = N->getOperand(0);
+ if (Src.getValueType() == MVT::v2f64)
+ Results.push_back(DAG.getNode(X86ISD::CVTP2SI, dl, MVT::v4i32, Src));
+ return;
+ }
+ [[fallthrough]];
case ISD::LLRINT: {
if (SDValue V = LRINT_LLRINTHelper(N, DAG))
Results.push_back(V);
diff --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll
index 3612205bf1bfa9c..b1c8d46f497f320 100644
--- a/llvm/test/CodeGen/X86/vector-lrint.ll
+++ b/llvm/test/CodeGen/X86/vector-lrint.ll
@@ -269,31 +269,17 @@ declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
; X86-SSE2-LABEL: lrint_v2f64:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm0
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
+; X86-SSE2-NEXT: cvtpd2dq %xmm0, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: lrint_v2f64:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; X86-AVX-NEXT: vcvtsd2si %xmm1, %eax
-; X86-AVX-NEXT: vcvtsd2si %xmm0, %ecx
-; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; X86-AVX-NEXT: vcvtpd2dq %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-AVX-i32-LABEL: lrint_v2f64:
; X64-AVX-i32: # %bb.0:
-; X64-AVX-i32-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; X64-AVX-i32-NEXT: vcvtsd2si %xmm1, %eax
-; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %ecx
-; X64-AVX-i32-NEXT: vmovd %ecx, %xmm0
-; X64-AVX-i32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; X64-AVX-i32-NEXT: vcvtpd2dq %xmm0, %xmm0
; X64-AVX-i32-NEXT: retq
;
; X64-AVX1-i64-LABEL: lrint_v2f64:
@@ -328,20 +314,9 @@ declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
; X86-SSE2-LABEL: lrint_v4f64:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm2
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm0
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
+; X86-SSE2-NEXT: cvtpd2dq %xmm1, %xmm1
+; X86-SSE2-NEXT: cvtpd2dq %xmm0, %xmm0
+; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: lrint_v4f64:
@@ -411,34 +386,12 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
; X86-SSE2-NEXT: andl $-16, %esp
; X86-SSE2-NEXT: subl $16, %esp
-; X86-SSE2-NEXT: movapd %xmm0, %xmm3
-; X86-SSE2-NEXT: movapd 8(%ebp), %xmm4
-; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm5
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm0
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
-; X86-SSE2-NEXT: cvtsd2si %xmm3, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm0
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm3, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
-; X86-SSE2-NEXT: cvtsd2si %xmm4, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm3
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm4, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; X86-SSE2-NEXT: cvtsd2si %xmm2, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm2, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm2
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; X86-SSE2-NEXT: cvtpd2dq %xmm1, %xmm1
+; X86-SSE2-NEXT: cvtpd2dq %xmm0, %xmm0
+; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X86-SSE2-NEXT: cvtpd2dq %xmm2, %xmm1
+; X86-SSE2-NEXT: cvtpd2dq 8(%ebp), %xmm2
+; X86-SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
``````````
</details>
https://github.com/llvm/llvm-project/pull/126508
More information about the llvm-commits
mailing list