[llvm] bb01387 - [X86] Add custom widening for v2f64->v2i32 strict_fp_to_uint with avx512f, but not avx512vl.

Thu Dec 26 12:42:47 PST 2019

Author: Craig Topper
Date: 2019-12-26T12:42:27-08:00
New Revision: bb0138729b8d4af3d8bd888c821ff79f1350f045

URL: https://github.com/llvm/llvm-project/commit/bb0138729b8d4af3d8bd888c821ff79f1350f045
DIFF: https://github.com/llvm/llvm-project/commit/bb0138729b8d4af3d8bd888c821ff79f1350f045.diff

LOG: [X86] Add custom widening for v2f64->v2i32 strict_fp_to_uint with avx512f, but not avx512vl.

AVX512F added instruction for vector fp_to_uint conversions. With
AVX512VL we can use a specific instruction that does v2f64->v4i32 with
zeroes in the 2 extra elements. For non-strict nodes without AVX512VL
we relied on type legalization to turn it to v4f64->v4i32 which would
later be widened by op legalization to v8f64->v8i32. But type legalization
doesn't currently widen strict nodes since it doesn't know how to
safely and efficiently pad the extra elements. But for X86 we know
padding with zeroes is safe and efficient so do that ourselves.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
    llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9b71fabda231..ac001f1e9f03 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28706,23 +28706,32 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
       assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
              "Unexpected type action!");
       if (Src.getValueType() == MVT::v2f64) {
+        unsigned Opc;
+        if (IsStrict)
+          Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
+        else
+          Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
+
+        // If we have VLX we can emit a target specific FP_TO_UINT node,.
         if (!IsSigned && !Subtarget.hasVLX()) {
-          // If we have VLX we can emit a target specific FP_TO_UINT node,
-          // otherwise we can defer to the generic legalizer which will widen
+          // Otherwise we can defer to the generic legalizer which will widen
           // the input as well. This will be further widened during op
           // legalization to v8i32<-v8f64.
-          return;
+          // For strict nodes we'll need to widen ourselves.
+          // FIXME: Fix the type legalizer to safely widen strict nodes?
+          if (!IsStrict)
+            return;
+          Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64, Src,
+                            DAG.getConstantFP(0.0, dl, MVT::v2f64));
+          Opc = N->getOpcode();
         }
         SDValue Res;
         SDValue Chain;
         if (IsStrict) {
-          unsigned Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI
-                                  : X86ISD::STRICT_CVTTP2UI;
           Res = DAG.getNode(Opc, dl, {MVT::v4i32, MVT::Other},
                             {N->getOperand(0), Src});
           Chain = Res.getValue(1);
         } else {
-          unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
           Res = DAG.getNode(Opc, dl, MVT::v4i32, Src);
         }
         Results.push_back(Res);

diff  --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
index 6cc4e703394e..82585d440878 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
@@ -1148,11 +1148,10 @@ define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 {
 ;
 ; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512F-NEXT:    vcvttsd2usi %xmm1, %eax
-; AVX512F-NEXT:    vcvttsd2usi %xmm0, %ecx
-; AVX512F-NEXT:    vmovd %ecx, %xmm0
-; AVX512F-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512F-NEXT:    vmovaps %xmm0, %xmm0
+; AVX512F-NEXT:    vcvttpd2udq %zmm0, %ymm0
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
@@ -1162,11 +1161,10 @@ define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 {
 ;
 ; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
 ; AVX512DQ:       # %bb.0:
-; AVX512DQ-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512DQ-NEXT:    vcvttsd2usi %xmm1, %eax
-; AVX512DQ-NEXT:    vcvttsd2usi %xmm0, %ecx
-; AVX512DQ-NEXT:    vmovd %ecx, %xmm0
-; AVX512DQ-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
+; AVX512DQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i32:

diff  --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index 7d5f039b27fb..3c3e3ed6127d 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -4904,10 +4904,10 @@ define <2 x i32> @constrained_vector_fptoui_v2i32_v2f64() #0 {
 ;
 ; AVX512-LABEL: constrained_vector_fptoui_v2i32_v2f64:
 ; AVX512:       # %bb.0: # %entry
-; AVX512-NEXT:    vcvttsd2usi {{.*}}(%rip), %eax
-; AVX512-NEXT:    vmovd %eax, %xmm0
-; AVX512-NEXT:    vcvttsd2usi {{.*}}(%rip), %eax
-; AVX512-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,0.0E+0,0.0E+0]
+; AVX512-NEXT:    vcvttpd2udq %zmm0, %ymm0
+; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
 entry:
   %result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(