[llvm] 0c95516 - [X86] Generate cvtpd2dq for (v2i32 lrint(v2f64)) (#126508)

via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 10 21:14:44 PST 2025


Author: Phoebe Wang
Date: 2025-02-11T13:14:41+08:00
New Revision: 0c955167c241fd64f3a6737c06fa54de46165cac

URL: https://github.com/llvm/llvm-project/commit/0c955167c241fd64f3a6737c06fa54de46165cac
DIFF: https://github.com/llvm/llvm-project/commit/0c955167c241fd64f3a6737c06fa54de46165cac.diff

LOG: [X86] Generate cvtpd2dq for (v2i32 lrint(v2f64)) (#126508)

Found when addressing comment on #126477

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vector-lrint.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 995b4de12ce12c2..8c28985c8e8e7da 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1159,6 +1159,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FCOPYSIGN,          MVT::v2f64, Custom);
 
     setOperationAction(ISD::LRINT, MVT::v4f32, Custom);
+    setOperationAction(ISD::LRINT, MVT::v2i32, Custom);
 
     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
       setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
@@ -34029,6 +34030,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     return;
   }
   case ISD::LRINT:
+    if (N->getValueType(0) == MVT::v2i32) {
+      SDValue Src = N->getOperand(0);
+      if (Src.getValueType() == MVT::v2f64)
+        Results.push_back(DAG.getNode(X86ISD::CVTP2SI, dl, MVT::v4i32, Src));
+      return;
+    }
+    [[fallthrough]];
   case ISD::LLRINT: {
     if (SDValue V = LRINT_LLRINTHelper(N, DAG))
       Results.push_back(V);

diff  --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll
index 3612205bf1bfa9c..b1c8d46f497f320 100644
--- a/llvm/test/CodeGen/X86/vector-lrint.ll
+++ b/llvm/test/CodeGen/X86/vector-lrint.ll
@@ -269,31 +269,17 @@ declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
 define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
 ; X86-SSE2-LABEL: lrint_v2f64:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    cvtsd2si %xmm0, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm1
-; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; X86-SSE2-NEXT:    cvtsd2si %xmm0, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm0
-; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; X86-SSE2-NEXT:    cvtpd2dq %xmm0, %xmm0
 ; X86-SSE2-NEXT:    retl
 ;
 ; X86-AVX-LABEL: lrint_v2f64:
 ; X86-AVX:       # %bb.0:
-; X86-AVX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; X86-AVX-NEXT:    vcvtsd2si %xmm1, %eax
-; X86-AVX-NEXT:    vcvtsd2si %xmm0, %ecx
-; X86-AVX-NEXT:    vmovd %ecx, %xmm0
-; X86-AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; X86-AVX-NEXT:    vcvtpd2dq %xmm0, %xmm0
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-AVX-i32-LABEL: lrint_v2f64:
 ; X64-AVX-i32:       # %bb.0:
-; X64-AVX-i32-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; X64-AVX-i32-NEXT:    vcvtsd2si %xmm1, %eax
-; X64-AVX-i32-NEXT:    vcvtsd2si %xmm0, %ecx
-; X64-AVX-i32-NEXT:    vmovd %ecx, %xmm0
-; X64-AVX-i32-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; X64-AVX-i32-NEXT:    vcvtpd2dq %xmm0, %xmm0
 ; X64-AVX-i32-NEXT:    retq
 ;
 ; X64-AVX1-i64-LABEL: lrint_v2f64:
@@ -328,20 +314,9 @@ declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
 define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
 ; X86-SSE2-LABEL: lrint_v4f64:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    cvtsd2si %xmm1, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm2
-; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
-; X86-SSE2-NEXT:    cvtsd2si %xmm1, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm1
-; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X86-SSE2-NEXT:    cvtsd2si %xmm0, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm1
-; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; X86-SSE2-NEXT:    cvtsd2si %xmm0, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm0
-; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; X86-SSE2-NEXT:    cvtpd2dq %xmm1, %xmm1
+; X86-SSE2-NEXT:    cvtpd2dq %xmm0, %xmm0
+; X86-SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X86-SSE2-NEXT:    retl
 ;
 ; X86-AVX-LABEL: lrint_v4f64:
@@ -411,34 +386,12 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
 ; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-SSE2-NEXT:    andl $-16, %esp
 ; X86-SSE2-NEXT:    subl $16, %esp
-; X86-SSE2-NEXT:    movapd %xmm0, %xmm3
-; X86-SSE2-NEXT:    movapd 8(%ebp), %xmm4
-; X86-SSE2-NEXT:    cvtsd2si %xmm1, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm5
-; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
-; X86-SSE2-NEXT:    cvtsd2si %xmm1, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm0
-; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
-; X86-SSE2-NEXT:    cvtsd2si %xmm3, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm0
-; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
-; X86-SSE2-NEXT:    cvtsd2si %xmm3, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm1
-; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
-; X86-SSE2-NEXT:    cvtsd2si %xmm4, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm3
-; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
-; X86-SSE2-NEXT:    cvtsd2si %xmm4, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm1
-; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; X86-SSE2-NEXT:    cvtsd2si %xmm2, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm1
-; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
-; X86-SSE2-NEXT:    cvtsd2si %xmm2, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm2
-; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; X86-SSE2-NEXT:    cvtpd2dq %xmm1, %xmm1
+; X86-SSE2-NEXT:    cvtpd2dq %xmm0, %xmm0
+; X86-SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X86-SSE2-NEXT:    cvtpd2dq %xmm2, %xmm1
+; X86-SSE2-NEXT:    cvtpd2dq 8(%ebp), %xmm2
+; X86-SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; X86-SSE2-NEXT:    movl %ebp, %esp
 ; X86-SSE2-NEXT:    popl %ebp
 ; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4


        


More information about the llvm-commits mailing list